From: Seung-Woo Kim Date: Tue, 1 Dec 2020 09:57:43 +0000 (+0900) Subject: gpu/arm: Remove duplicated mali midgard driver X-Git-Tag: submit/tizen/20201211.030542~28 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=dbaec0b72d5fc5a742ca3ce1469b35c5d934cf79;p=platform%2Fkernel%2Flinux-amlogic.git gpu/arm: Remove duplicated mali midgard driver In the tree, there are two duplicated mali midgard driver in drivers/gpu/arm/midgard and drivers/gpu/drm/bifrost/midgard with different version. There is no reason to keep old release version, so keep only the later version, r16p0, 11.13, to drivers/gpu/arm/midgard. Also, config option for choosing a version is removed. Signed-off-by: Seung-Woo Kim --- diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 9d3ab611dc09..89ba6ce66393 100755 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -21,7 +21,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r12p0-01rel0" +MALI_RELEASE_NAME ?= "r16p0-01rel0" # Paths required for build KBASE_PATH = $(src) @@ -30,32 +30,20 @@ UMP_PATH = $(src)/../../../base # Set up defaults if not defined by build system MALI_CUSTOMER_RELEASE ?= 1 +MALI_USE_CSF ?= 0 MALI_UNIT_TEST ?= 0 MALI_KERNEL_TEST_API ?= 0 -MALI_MOCK_TEST ?= 0 MALI_COVERAGE ?= 0 CONFIG_MALI_PLATFORM_NAME ?= "devicetree" -# This workaround is for what seems to be a compiler bug we observed in -# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling -# the "_Pragma" syntax, where an error message is returned: -# -# "internal compiler error: unspellable token PRAGMA" -# -# This regression has thus far only been seen on the GCC 4.7 compiler bundled -# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds -# which are not known to be used with AOSP, is hardcoded to disable the -# workaround, i.e. set the define to 0. -MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0 # Set up our defines, which will be passed to gcc DEFINES = \ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ - -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ - -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598) + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" ifeq ($(KBUILD_EXTMOD),) # in-tree @@ -66,11 +54,13 @@ DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME endif DEFINES += -I$(srctree)/drivers/staging/android -#ldflags-y += --strip-debug +ldflags-y += --strip-debug + +DEFINES += -DMALI_KBASE_BUILD # Use our defines when compiling ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux -subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux SRC := \ mali_kbase_device.c \ @@ -89,12 +79,16 @@ SRC := \ mali_kbase_pm.c \ mali_kbase_config.c \ mali_kbase_vinstr.c \ + mali_kbase_hwcnt.c \ + mali_kbase_hwcnt_backend_gpu.c \ + mali_kbase_hwcnt_gpu.c \ + mali_kbase_hwcnt_legacy.c \ + mali_kbase_hwcnt_types.c \ + mali_kbase_hwcnt_virtualizer.c \ mali_kbase_softjobs.c \ mali_kbase_10969_workaround.c \ mali_kbase_hw.c \ - mali_kbase_utility.c \ mali_kbase_debug.c \ - mali_kbase_trace_timeline.c \ mali_kbase_gpu_memory_debugfs.c \ mali_kbase_mem_linux.c \ mali_kbase_core_linux.c \ @@ -116,7 +110,7 @@ SRC := \ thirdparty/mali_kbase_mmap.c -ifeq ($(CONFIG_MALI_JOB_DUMP),y) +ifeq ($(CONFIG_MALI_CINSTR_GWT),y) SRC += mali_kbase_gwt.c endif @@ -149,6 +143,10 @@ ifeq ($(CONFIG_MALI_DEVFREQ),y) endif endif +ifeq ($(MALI_USE_CSF),1) + include $(src)/csf/Kbuild +endif + mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ mali_kbase_dma_fence.o \ mali_kbase_fence.o @@ -160,11 +158,6 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_sync_common.o \ mali_kbase_fence.o -ifeq ($(MALI_MOCK_TEST),1) -# Test functionality -mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o -endif - include $(src)/backend/gpu/Kbuild mali_kbase-y += $(BACKEND:.c=.o) diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index d9719035471d..7c100165e1ee 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -31,14 +31,12 @@ menuconfig MALI_MIDGARD this will generate a single module, called mali_kbase. config MALI_GATOR_SUPPORT - bool "Streamline support via Gator" + bool "Enable Streamline tracing support" depends on MALI_MIDGARD - default n + default y help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - You will need the Gator device driver already loaded before loading this driver when enabling - Streamline debug support. - This is a legacy interface required by older versions of Streamline. + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -58,6 +56,7 @@ config MALI_MIDGARD_ENABLE_TRACE config MALI_DEVFREQ bool "devfreq support for Mali" depends on MALI_MIDGARD && PM_DEVFREQ + default y help Support devfreq for Mali. @@ -107,19 +106,6 @@ config MALI_CORESTACK If unsure, say N. -config MALI_PRFCNT_SET_SECONDARY - bool "Use secondary set of performance counters" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option to use secondary set of performance counters. Kernel - features that depend on an access to the primary set of counters may - become unavailable. Enabling this option will prevent power management - from working optimally and may cause instrumentation tools to return - bogus results. - - If unsure, say N. - config MALI_DEBUG bool "Debug build" depends on MALI_MIDGARD && MALI_EXPERT @@ -163,13 +149,6 @@ config MALI_ERROR_INJECT help Enables insertion of errors to test module failure and recovery mechanisms. -config MALI_TRACE_TIMELINE - bool "Timeline tracing" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Enables timeline tracing through the kernel tracepoint system. - config MALI_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_MIDGARD && MALI_EXPERT @@ -180,16 +159,6 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_JOB_DUMP - bool "Enable system level support needed for job dumping" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Choose this option to enable system level support needed for - job dumping. This is typically used for instrumentation but has - minimal overhead when not in use. Enable only if you know what - you are doing. - config MALI_2MB_ALLOC bool "Attempt to allocate 2MB pages" depends on MALI_MIDGARD && MALI_EXPERT @@ -213,5 +182,30 @@ config MALI_PWRSOFT_765 If using kernel >= v4.10 then say N, otherwise if devfreq cooling changes have been backported say Y to avoid compilation errors. +# Instrumentation options. + +config MALI_JOB_DUMP + bool "Enable system level support needed for job dumping" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Choose this option to enable system level support needed for + job dumping. This is typically used for instrumentation but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. + source "drivers/gpu/arm/midgard/platform/Kconfig" source "drivers/gpu/arm/midgard/tests/Kconfig" diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile index 13af9f473890..08b2fa9f202c 100644 --- a/drivers/gpu/arm/midgard/Makefile +++ b/drivers/gpu/arm/midgard/Makefile @@ -25,10 +25,6 @@ KDIR ?= /lib/modules/$(shell uname -r)/build BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. KBASE_PATH_RELATIVE = $(CURDIR) -ifeq ($(MALI_UNIT_TEST), 1) - EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers -endif - ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y) #Add bus logger symbols EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase index d7898cb3d1a5..6b0f81ee76e8 100755 --- a/drivers/gpu/arm/midgard/Makefile.kbase +++ b/drivers/gpu/arm/midgard/Makefile.kbase @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -19,5 +19,5 @@ # # -EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) +EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM) diff --git a/drivers/gpu/arm/midgard/Mconfig b/drivers/gpu/arm/midgard/Mconfig index 9ad765a05260..46dca1498fe8 100755 --- a/drivers/gpu/arm/midgard/Mconfig +++ b/drivers/gpu/arm/midgard/Mconfig @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -23,15 +23,12 @@ menuconfig MALI_MIDGARD this will generate a single module, called mali_kbase. config MALI_GATOR_SUPPORT - bool "Streamline support via Gator" - depends on MALI_MIDGARD - default y if INSTRUMENTATION_STREAMLINE_OLD - default n + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD && !BACKEND_USER + default y help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - You will need the Gator device driver already loaded before loading this driver when enabling - Streamline debug support. - This is a legacy interface required by older versions of Streamline. + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -77,6 +74,7 @@ config MALI_PLATFORM_NAME default "arndale_octa" if PLATFORM_ARNDALE_OCTA default "rk" if PLATFORM_FIREFLY default "hisilicon" if PLATFORM_HIKEY960 + default "hisilicon" if PLATFORM_HIKEY970 default "vexpress" if PLATFORM_VEXPRESS default "devicetree" help @@ -84,10 +82,8 @@ config MALI_PLATFORM_NAME include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must exist. -config MALI_MOCK_TEST - bool - depends on MALI_MIDGARD && !RELEASE - default y + When PLATFORM_CUSTOM is set, this needs to be set manually to + pick up the desired platform files. # MALI_EXPERT configuration options @@ -112,19 +108,6 @@ config MALI_CORESTACK If unsure, say N. -config MALI_PRFCNT_SET_SECONDARY - bool "Use secondary set of performance counters" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option to use secondary set of performance counters. Kernel - features that depend on an access to the primary set of counters may - become unavailable. Enabling this option will prevent power management - from working optimally and may cause instrumentation tools to return - bogus results. - - If unsure, say N. - config MALI_DEBUG bool "Debug build" depends on MALI_MIDGARD && MALI_EXPERT @@ -164,13 +147,6 @@ config MALI_ERROR_INJECT_RANDOM help Injected errors are random, rather than user-driven. -config MALI_TRACE_TIMELINE - bool "Timeline tracing" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Enables timeline tracing through the kernel tracepoint system. - config MALI_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_MIDGARD && MALI_EXPERT @@ -206,4 +182,9 @@ config MALI_PWRSOFT_765 not merged in mainline kernel yet. So this define helps to guard those parts of the code. +# Instrumentation options. + +# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. +# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. + source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild index bdf4c5ad53bd..2dc14559c6f9 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild +++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2014,2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -30,31 +30,25 @@ BACKEND += \ backend/gpu/mali_kbase_jm_as.c \ backend/gpu/mali_kbase_jm_hw.c \ backend/gpu/mali_kbase_jm_rb.c \ - backend/gpu/mali_kbase_js_affinity.c \ backend/gpu/mali_kbase_js_backend.c \ backend/gpu/mali_kbase_mmu_hw_direct.c \ backend/gpu/mali_kbase_pm_backend.c \ backend/gpu/mali_kbase_pm_driver.c \ backend/gpu/mali_kbase_pm_metrics.c \ backend/gpu/mali_kbase_pm_ca.c \ - backend/gpu/mali_kbase_pm_ca_fixed.c \ backend/gpu/mali_kbase_pm_always_on.c \ backend/gpu/mali_kbase_pm_coarse_demand.c \ - backend/gpu/mali_kbase_pm_demand.c \ backend/gpu/mali_kbase_pm_policy.c \ backend/gpu/mali_kbase_time.c ifeq ($(MALI_CUSTOMER_RELEASE),0) BACKEND += \ - backend/gpu/mali_kbase_pm_ca_random.c \ - backend/gpu/mali_kbase_pm_demand_always_powered.c \ - backend/gpu/mali_kbase_pm_fast_start.c + backend/gpu/mali_kbase_pm_always_on_demand.c endif ifeq ($(CONFIG_MALI_DEVFREQ),y) BACKEND += \ - backend/gpu/mali_kbase_devfreq.c \ - backend/gpu/mali_kbase_pm_ca_devfreq.c + backend/gpu/mali_kbase_devfreq.c endif ifeq ($(CONFIG_MALI_NO_MALI),y) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h index 196a776f7f9d..4a61f96c8c7d 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,8 +27,5 @@ #ifndef _KBASE_BACKEND_CONFIG_H_ #define _KBASE_BACKEND_CONFIG_H_ -/* Enable GPU reset API */ -#define KBASE_GPU_RESET_EN 1 - #endif /* _KBASE_BACKEND_CONFIG_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index 49567f785d2c..7378bfd7b397 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,6 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, kbdev->current_gpu_coherency_mode = mode; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c index c9c463eb458d..450f6e750a0c 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -152,7 +152,7 @@ bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { kctx->reg_dump[offset+1] = kbase_reg_read(kctx->kbdev, - kctx->reg_dump[offset], NULL); + kctx->reg_dump[offset]); offset += 2; } return true; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index 432c2aad8bd0..5ade0122b5bb 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -148,9 +148,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) } #endif - if (kbdev->pm.backend.ca_current_policy->id == - KBASE_PM_CA_POLICY_ID_DEVFREQ) - kbase_devfreq_set_core_mask(kbdev, core_mask); + kbase_devfreq_set_core_mask(kbdev, core_mask); *target_freq = nominal_freq; kbdev->current_voltage = voltage; @@ -259,6 +257,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) struct device_node *node; int i = 0; int count; + u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; if (!opp_node) return 0; @@ -283,8 +282,17 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) if (of_property_read_u64(node, "opp-hz-real", &real_freq)) real_freq = opp_freq; if (of_property_read_u64(node, "opp-core-mask", &core_mask)) - core_mask = - kbdev->gpu_props.props.raw_props.shader_present; + core_mask = shader_present; + if (core_mask != shader_present && + (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) || + corestack_driver_control || + platform_power_down_only)) { + + dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + opp_freq); + continue; + } + core_count_p = of_get_property(node, "opp-core-count", NULL); if (core_count_p) { u64 remaining_core_mask = diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c index a0dfd81a8089..5dd059fb3420 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,6 +29,7 @@ #include #include +#include #if !defined(CONFIG_MALI_NO_MALI) @@ -154,11 +155,9 @@ void kbase_io_history_dump(struct kbase_device *kbdev) #endif /* CONFIG_DEBUG_FS */ -void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, - struct kbase_context *kctx) +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); writel(value, kbdev->reg + offset); @@ -168,21 +167,15 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, value, 1); #endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); - - if (kctx && kctx->jctx.tb) - kbase_device_trace_register_access(kctx, REG_WRITE, offset, - value); + dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); } KBASE_EXPORT_TEST_API(kbase_reg_write); -u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, - struct kbase_context *kctx) +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { u32 val; KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); val = readl(kbdev->reg + offset); @@ -192,10 +185,8 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, val, 0); #endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); + dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); - if (kctx && kctx->jctx.tb) - kbase_device_trace_register_access(kctx, REG_READ, offset, val); return val; } @@ -216,11 +207,11 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) u32 status; u64 address; - status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); + status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); address = (u64) kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; address |= kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", status & 0xFF, @@ -230,6 +221,84 @@ static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); } +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +{ + u32 irq_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->cache_clean_in_progress) { + /* If this is called while another clean is in progress, we + * can't rely on the current one to flush any new changes in + * the cache. Instead, trigger another cache clean immediately + * after this one finishes. + */ + kbdev->cache_clean_queued = true; + return; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + kbdev->cache_clean_in_progress = true; +} + +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->cache_clean_queued) { + kbdev->cache_clean_queued = false; + + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + } else { + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + kbdev->cache_clean_in_progress = false; + + wake_up(&kbdev->cache_clean_wait); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + while (kbdev->cache_clean_in_progress) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) { KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); @@ -242,18 +311,29 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) if (val & PRFCNT_SAMPLE_COMPLETED) kbase_instr_hwcnt_sample_done(kbdev); + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); + + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must + * be called after the IRQ has been cleared. This is because it might + * trigger further power transitions and we don't want to miss the + * interrupt raised to notify us that these further transitions have + * finished. The same applies to kbase_clean_caches_done() - if another + * clean was queued, it might trigger another clean, which might + * generate another interrupt which shouldn't be missed. + */ + if (val & CLEAN_CACHES_COMPLETED) kbase_clean_caches_done(kbdev); - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); - - /* kbase_pm_check_transitions must be called after the IRQ has been - * cleared. This is because it might trigger further power transitions - * and we don't want to miss the interrupt raised to notify us that - * these further transitions have finished. + /* When 'platform_power_down_only' is enabled, the L2 cache is not + * powered down, but flushed before the GPU power down (which is done + * by the platform code). So the L2 state machine requests a cache + * flush. And when that flush completes, the L2 state machine needs to + * be re-invoked to proceed with the GPU power down. */ - if (val & POWER_CHANGED_ALL) + if (val & POWER_CHANGED_ALL || + (platform_power_down_only && (val & CLEAN_CACHES_COMPLETED))) kbase_pm_power_changed(kbdev); KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h index 729256ec6ce3..7886e96dd90f 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,30 +34,47 @@ * @kbdev: Kbase device pointer * @offset: Offset of register * @value: Value to write - * @kctx: Kbase context pointer. May be NULL * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If - * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr - * != KBASEP_AS_NR_INVALID). + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). */ -void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, - struct kbase_context *kctx); +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); /** * kbase_reg_read - read from GPU register * @kbdev: Kbase device pointer * @offset: Offset of register - * @kctx: Kbase context pointer. May be NULL * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If - * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr - * != KBASEP_AS_NR_INVALID). + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). * * Return: Value in desired register */ -u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, - struct kbase_context *kctx); +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); +/** + * kbase_gpu_start_cache_clean - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. This function will + * take hwaccess_lock. + */ +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean_nolock - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. hwaccess_lock + * must be held by the caller. + */ +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish + * @kbdev: Kbase device + * + * This function will take hwaccess_lock, and may sleep. + */ +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); /** * kbase_gpu_interrupt - GPU interrupt handler diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c index 881d50c14af8..995d34da0c6f 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,7 +56,7 @@ int kbase_backend_early_init(struct kbase_device *kbdev) if (err) goto fail_interrupts; - err = kbase_hwaccess_pm_init(kbdev); + err = kbase_hwaccess_pm_early_init(kbdev); if (err) goto fail_pm; @@ -74,7 +74,7 @@ fail_runtime_pm: void kbase_backend_early_term(struct kbase_device *kbdev) { - kbase_hwaccess_pm_term(kbdev); + kbase_hwaccess_pm_early_term(kbdev); kbase_release_interrupts(kbdev); kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); @@ -84,10 +84,14 @@ int kbase_backend_late_init(struct kbase_device *kbdev) { int err; - err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + err = kbase_hwaccess_pm_late_init(kbdev); if (err) return err; + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + err = kbase_backend_timer_init(kbdev); if (err) goto fail_timer; @@ -121,6 +125,8 @@ fail_interrupt_test: kbase_backend_timer_term(kbdev); fail_timer: kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_hwaccess_pm_late_term(kbdev); return err; } @@ -131,5 +137,5 @@ void kbase_backend_late_term(struct kbase_device *kbdev) kbase_job_slot_term(kbdev); kbase_backend_timer_term(kbdev); kbase_hwaccess_pm_halt(kbdev); + kbase_hwaccess_pm_late_term(kbdev); } - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index 8809ab0bed5b..39773e6e63aa 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -37,62 +37,61 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, int i; /* Fill regdump with the content of the relevant registers */ - regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL); + regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); regdump->l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES), NULL); + GPU_CONTROL_REG(L2_FEATURES)); regdump->core_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CORE_FEATURES), NULL); + GPU_CONTROL_REG(CORE_FEATURES)); regdump->tiler_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_FEATURES), NULL); + GPU_CONTROL_REG(TILER_FEATURES)); regdump->mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES), NULL); + GPU_CONTROL_REG(MEM_FEATURES)); regdump->mmu_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MMU_FEATURES), NULL); + GPU_CONTROL_REG(MMU_FEATURES)); regdump->as_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(AS_PRESENT), NULL); + GPU_CONTROL_REG(AS_PRESENT)); regdump->js_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_PRESENT), NULL); + GPU_CONTROL_REG(JS_PRESENT)); for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) regdump->js_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL); + GPU_CONTROL_REG(JS_FEATURES_REG(i))); for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) regdump->texture_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); + GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); regdump->thread_max_threads = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL); + GPU_CONTROL_REG(THREAD_MAX_THREADS)); regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE), - NULL); + GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); regdump->thread_max_barrier_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); + GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); regdump->thread_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_FEATURES), NULL); + GPU_CONTROL_REG(THREAD_FEATURES)); regdump->thread_tls_alloc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_TLS_ALLOC), NULL); + GPU_CONTROL_REG(THREAD_TLS_ALLOC)); regdump->shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); + GPU_CONTROL_REG(SHADER_PRESENT_LO)); regdump->shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL); + GPU_CONTROL_REG(SHADER_PRESENT_HI)); regdump->tiler_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_LO), NULL); + GPU_CONTROL_REG(TILER_PRESENT_LO)); regdump->tiler_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_HI), NULL); + GPU_CONTROL_REG(TILER_PRESENT_HI)); regdump->l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO), NULL); + GPU_CONTROL_REG(L2_PRESENT_LO)); regdump->l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI), NULL); + GPU_CONTROL_REG(L2_PRESENT_HI)); regdump->stack_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_LO), NULL); + GPU_CONTROL_REG(STACK_PRESENT_LO)); regdump->stack_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_HI), NULL); + GPU_CONTROL_REG(STACK_PRESENT_HI)); } void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, @@ -103,7 +102,7 @@ void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, kbase_pm_register_access_enable(kbdev); regdump->coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + GPU_CONTROL_REG(COHERENCY_FEATURES)); /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 77d71f5b9566..79c04d9abaef 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,52 +33,16 @@ #include #include -/** - * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to - * hardware - * - * @kbdev: Kbase device - */ -static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) -{ - unsigned long flags; - unsigned long pm_flags; - u32 irq_mask; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - - /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - /* clean&invalidate the caches so we're sure the mmu tables for the dump - * buffer is valid */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES, NULL); - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -} - int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable) + struct kbase_instr_hwcnt_enable *enable) { - unsigned long flags, pm_flags; + unsigned long flags; int err = -EINVAL; u32 irq_mask; - int ret; - u64 shader_cores_needed; u32 prfcnt_config; - shader_cores_needed = kbase_pm_get_present_cores(kbdev, - KBASE_PM_CORE_SHADER); + lockdep_assert_held(&kbdev->hwaccess_lock); /* alignment failure */ if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) @@ -88,53 +52,30 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, */ kbase_pm_ca_instr_enable(kbdev); - /* Request the cores early on synchronously - we'll release them on any - * errors (e.g. instrumentation already active) */ - kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_unrequest_cores; + goto out_err; } /* Enable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | - PRFCNT_SAMPLE_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + PRFCNT_SAMPLE_COMPLETED); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = enable->dump_buffer; - - /* Request the clean */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; - kbdev->hwcnt.backend.triggered = 0; - /* Clean&invalidate the caches so we're sure the mmu tables for the dump - * buffer is valid */ - ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, - &kbdev->hwcnt.backend.cache_clean_work); - KBASE_DEBUG_ASSERT(ret); + kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - /* Wait for cacheclean to complete */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_IDLE); - - kbase_pm_request_l2_caches(kbdev); - /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + if (enable->use_secondary) { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) @@ -144,38 +85,36 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, if (arch_v6) prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; } -#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - enable->dump_buffer & 0xFFFFFFFF, kctx); + enable->dump_buffer & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - enable->dump_buffer >> 32, kctx); + enable->dump_buffer >> 32); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->jm_bm, kctx); + enable->jm_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - enable->shader_bm, kctx); + enable->shader_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - enable->mmu_l2_bm, kctx); + enable->mmu_l2_bm); /* Due to PRLAM-8186 we need to disable the Tiler before we enable the * HW counter dump. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, - kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0); else kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm, kctx); + enable->tiler_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm, kctx); + enable->tiler_bm); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); @@ -189,10 +128,6 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; - out_unrequest_cores: - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } @@ -205,17 +140,20 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) struct kbase_device *kbdev = kctx->kbdev; while (1) { + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); goto out; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); goto out; } @@ -223,6 +161,7 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) break; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Ongoing dump/setup - wait for its completion */ wait_event(kbdev->hwcnt.backend.wait, @@ -233,26 +172,21 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + irq_mask & ~PRFCNT_SAMPLE_COMPLETED); /* Disable the counters */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); kbdev->hwcnt.kctx = NULL; kbdev->hwcnt.addr = 0ULL; + kbdev->hwcnt.addr_bytes = 0ULL; kbase_pm_ca_instr_disable(kbdev); - kbase_pm_unrequest_cores(kbdev, true, - kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); - - kbase_pm_release_l2_caches(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); @@ -290,15 +224,15 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); + kbdev->hwcnt.addr & 0xFFFFFFFF); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - kbdev->hwcnt.addr >> 32, NULL); + kbdev->hwcnt.addr >> 32); /* Start dumping */ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, kbdev->hwcnt.addr, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_SAMPLE, kctx); + GPU_COMMAND_PRFCNT_SAMPLE); dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); @@ -337,33 +271,34 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); void kbasep_cache_clean_worker(struct work_struct *data) { struct kbase_device *kbdev; - unsigned long flags; + unsigned long flags, pm_flags; kbdev = container_of(data, struct kbase_device, hwcnt.backend.cache_clean_work); - mutex_lock(&kbdev->cacheclean_lock); - kbasep_instr_hwcnt_cacheclean(kbdev); - + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Wait for our condition, and any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - wait_event(kbdev->hwcnt.backend.cache_clean_wait, - kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - } + + /* Clean and invalidate the caches so we're sure the mmu tables for the + * dump buffer is valid. + */ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_CLEANED); + KBASE_INSTR_STATE_REQUEST_CLEAN); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + kbase_gpu_wait_cache_clean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); /* All finished and idle */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - mutex_unlock(&kbdev->cacheclean_lock); } void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) @@ -376,53 +311,32 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { - int ret; - /* Always clean and invalidate the cache after a successful dump - */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; - ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, - &kbdev->hwcnt.backend.cache_clean_work); - KBASE_DEBUG_ASSERT(ret); + if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else { + int ret; + /* Always clean and invalidate the cache after a successful dump + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } -void kbase_clean_caches_done(struct kbase_device *kbdev) -{ - u32 irq_mask; - - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { - unsigned long flags; - unsigned long pm_flags; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - /* Disable interrupt */ - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - /* Wakeup... */ - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { - /* Only wake if we weren't resetting */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; - wake_up(&kbdev->hwcnt.backend.cache_clean_wait); - } - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - } -} - int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; unsigned long flags; int err; - /* Wait for dump & cacheclean to complete */ + /* Wait for dump & cache clean to complete */ wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); @@ -460,7 +374,7 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) /* Clear the counters */ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_CLEAR, kctx); + GPU_COMMAND_PRFCNT_CLEAR); err = 0; @@ -477,7 +391,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; init_waitqueue_head(&kbdev->hwcnt.backend.wait); - init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, kbasep_cache_clean_worker); kbdev->hwcnt.backend.triggered = 0; @@ -494,4 +407,3 @@ void kbase_instr_backend_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); } - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h index fb55d2d56f2c..c9fb7593a936 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,11 +39,6 @@ enum kbase_instr_state { KBASE_INSTR_STATE_DUMPING, /* We've requested a clean to occur on a workqueue */ KBASE_INSTR_STATE_REQUEST_CLEAN, - /* Hardware is currently cleaning and invalidating caches. */ - KBASE_INSTR_STATE_CLEANING, - /* Cache clean completed, and either a) a dump is complete, or - * b) instrumentation can now be setup. */ - KBASE_INSTR_STATE_CLEANED, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; @@ -54,7 +49,6 @@ struct kbase_instr_backend { int triggered; enum kbase_instr_state state; - wait_queue_head_t cache_clean_wait; struct workqueue_struct *cache_clean_wq; struct work_struct cache_clean_work; }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h index 608379e4ca0f..2254b9f30d02 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,12 +35,6 @@ */ void kbasep_cache_clean_worker(struct work_struct *data); -/** - * kbase_clean_caches_done() - Cache clean interrupt received - * @kbdev: Kbase device - */ -void kbase_clean_caches_done(struct kbase_device *kbdev); - /** * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received * @kbdev: Kbase device diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index 95bebf854637..dd0279a03abc 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -96,7 +96,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -134,7 +134,7 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -239,7 +239,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); @@ -251,7 +251,7 @@ static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -271,7 +271,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); @@ -283,7 +283,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -327,9 +327,9 @@ static int kbasep_common_test_interrupt( } /* store old mask */ - old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); + old_mask_val = kbase_reg_read(kbdev, mask_offset); /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + kbase_reg_write(kbdev, mask_offset, 0x0); if (kbdev->irqs[tag].irq) { /* release original handler and install test handler */ @@ -343,8 +343,8 @@ static int kbasep_common_test_interrupt( kbasep_test_interrupt_timeout; /* trigger interrupt */ - kbase_reg_write(kbdev, mask_offset, 0x1, NULL); - kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); + kbase_reg_write(kbdev, mask_offset, 0x1); + kbase_reg_write(kbdev, rawstat_offset, 0x1); hrtimer_start(&kbasep_irq_test_data.timer, HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), @@ -366,7 +366,7 @@ static int kbasep_common_test_interrupt( kbasep_irq_test_data.triggered = 0; /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + kbase_reg_write(kbdev, mask_offset, 0x0); /* release test handler */ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); @@ -382,7 +382,7 @@ static int kbasep_common_test_interrupt( } } /* restore old mask */ - kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); + kbase_reg_write(kbdev, mask_offset, old_mask_val); return err; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index fd56b086d5d0..acd4a5aff94b 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -31,28 +31,72 @@ #include #endif #include -#include #include #include #include +#include #include #include -#include #include #define beenthere(kctx, f, a...) \ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) -#if KBASE_GPU_RESET_EN static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); static void kbasep_reset_timeout_worker(struct work_struct *data); static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); -#endif /* KBASE_GPU_RESET_EN */ static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, struct kbase_context *kctx) { - return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx); + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); +} + +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + base_jd_core_req core_req, + int js) +{ + u64 affinity; + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == + BASE_JD_REQ_T) { + /* Tiler-only atom */ + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. + */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + affinity = 1; + else + affinity = 0; + } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + struct mali_base_gpu_coherent_group_info *coherency_info = + &kbdev->gpu_props.props.coherency_info; + + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + + /* JS2 on a dual core group system targets core group 1. All + * other cases target core group 0. + */ + if (js == 2 && num_core_groups > 1) + affinity &= coherency_info->group[1].core_mask; + else + affinity &= coherency_info->group[0].core_mask; + } else { + /* Use all cores */ + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + affinity & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + affinity >> 32); + + return affinity; } void kbase_job_hw_submit(struct kbase_device *kbdev, @@ -62,6 +106,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_context *kctx; u32 cfg; u64 jc_head = katom->jc; + u64 affinity; KBASE_DEBUG_ASSERT(kbdev); KBASE_DEBUG_ASSERT(katom); @@ -70,20 +115,13 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); - /* Affinity is not violating */ - kbase_js_debug_log_current_affinities(kbdev); - KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, - katom->affinity)); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), - jc_head & 0xFFFFFFFF, kctx); + jc_head & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), - jc_head >> 32, kctx); + jc_head >> 32); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - katom->affinity & 0xFFFFFFFF, kctx); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - katom->affinity >> 32, kctx); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ @@ -101,6 +139,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + cfg |= JS_CONFIG_END_FLUSH_CLEAN; else cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; @@ -127,11 +167,11 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, } } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), - katom->flush_id, kctx); + katom->flush_id); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -139,11 +179,11 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom->start_timestamp = ktime_get(); /* GO ! */ - dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", - katom, kctx, js, jc_head, katom->affinity); + dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + katom, kctx, js, jc_head); KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, - (u32) katom->affinity); + (u32)affinity); #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event( @@ -151,7 +191,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kctx, kbase_jd_atom_id(kctx, katom)); #endif KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, - katom->affinity, cfg); + affinity, cfg); KBASE_TLSTREAM_TL_RET_CTX_LPU( kctx, &kbdev->gpu_props.props.raw_props.js_features[ @@ -174,10 +214,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif - kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_START, katom->kctx); + JS_COMMAND_START); } /** @@ -269,10 +307,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) /* read out the job slot status code if the job * slot reported failure */ completion_code = kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_STATUS), NULL); + JOB_SLOT_REG(i, JS_STATUS)); - switch (completion_code) { - case BASE_JD_EVENT_STOPPED: + if (completion_code == BASE_JD_EVENT_STOPPED) { #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event( GATOR_MAKE_EVENT( @@ -287,38 +324,27 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * JS_TAIL so that the job chain can * be resumed */ job_tail = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_LO), - NULL) | + JOB_SLOT_REG(i, JS_TAIL_LO)) | ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_HI), - NULL) << 32); - break; - case BASE_JD_EVENT_NOT_STARTED: + JOB_SLOT_REG(i, JS_TAIL_HI)) + << 32); + } else if (completion_code == + BASE_JD_EVENT_NOT_STARTED) { /* PRLAM-10673 can cause a TERMINATED * job to come back as NOT_STARTED, but * the error interrupt helps us detect * it */ completion_code = BASE_JD_EVENT_TERMINATED; - /* fall through */ - default: - meson_gpu_data_invalid_count ++; - dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", - i, completion_code, - kbase_exception_name - (kbdev, - completion_code)); } kbase_gpu_irq_evict(kbdev, i, completion_code); } kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), - done & ((1 << i) | (1 << (i + 16))), - NULL); + done & ((1 << i) | (1 << (i + 16)))); active = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_JS_STATE), - NULL); + JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) { @@ -363,7 +389,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) * execution. */ u32 rawstat = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); if ((rawstat >> (i + 16)) & 1) { /* There is a failed job that we've @@ -413,7 +439,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } spurious: done = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { /* Workaround for missing interrupt caused by @@ -421,7 +447,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (((active >> i) & 1) && (0 == kbase_reg_read(kbdev, JOB_SLOT_REG(i, - JS_STATUS), NULL))) { + JS_STATUS)))) { /* Force job slot to be processed again */ done |= (1u << i); @@ -439,7 +465,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#if KBASE_GPU_RESET_EN if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) { /* If we're trying to reset the GPU then we might be able to do @@ -448,7 +473,6 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) */ kbasep_try_reset_gpu_early(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } KBASE_EXPORT_TEST_API(kbase_job_done); @@ -485,7 +509,6 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) { - struct kbase_context *kctx = target_katom->kctx; #if KBASE_TRACE_ENABLE u32 status_reg_before; u64 job_in_head_before; @@ -495,12 +518,11 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* Check the head pointer */ job_in_head_before = ((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_LO), NULL)) + JOB_SLOT_REG(js, JS_HEAD_LO))) | (((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) + JOB_SLOT_REG(js, JS_HEAD_HI))) << 32); - status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), - NULL); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); #endif if (action == JS_COMMAND_SOFT_STOP) { @@ -604,11 +626,10 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, } } - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); #if KBASE_TRACE_ENABLE - status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), - NULL); + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); if (status_reg_after == BASE_JD_EVENT_ACTIVE) { struct kbase_jd_atom *head; struct kbase_context *head_kctx; @@ -777,7 +798,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; -#if KBASE_GPU_RESET_EN if (kbase_prepare_to_reset_gpu(kbdev)) { dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", @@ -789,12 +809,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) wait_event(kbdev->hwaccess.backend.reset_wait, atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING); -#else - dev_warn(kbdev->dev, - "Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", - ZAP_TIMEOUT); - -#endif exit: dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); @@ -813,7 +827,7 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); if (kbdev->pm.backend.gpu_powered) flush_id = kbase_reg_read(kbdev, - GPU_CONTROL_REG(LATEST_FLUSH), NULL); + GPU_CONTROL_REG(LATEST_FLUSH)); mutex_unlock(&kbdev->pm.lock); } @@ -822,7 +836,6 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) int kbase_job_slot_init(struct kbase_device *kbdev) { -#if KBASE_GPU_RESET_EN kbdev->hwaccess.backend.reset_workq = alloc_workqueue( "Mali reset workqueue", 0, 1); if (NULL == kbdev->hwaccess.backend.reset_workq) @@ -835,7 +848,6 @@ int kbase_job_slot_init(struct kbase_device *kbdev) HRTIMER_MODE_REL); kbdev->hwaccess.backend.reset_timer.function = kbasep_reset_timer_callback; -#endif return 0; } @@ -848,13 +860,10 @@ void kbase_job_slot_halt(struct kbase_device *kbdev) void kbase_job_slot_term(struct kbase_device *kbdev) { -#if KBASE_GPU_RESET_EN destroy_workqueue(kbdev->hwaccess.backend.reset_workq); -#endif } KBASE_EXPORT_TEST_API(kbase_job_slot_term); -#if KBASE_GPU_RESET_EN /** * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot * @kbdev: kbase device pointer @@ -912,7 +921,6 @@ static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, return ret; } -#endif /* KBASE_GPU_RESET_EN */ /** * kbase_job_slot_softstop_swflags - Soft-stop a job with flags @@ -969,7 +977,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, { struct kbase_device *kbdev = kctx->kbdev; bool stopped; -#if KBASE_GPU_RESET_EN /* We make the check for AFBC before evicting/stopping atoms. Note * that no other thread can modify the slots whilst we have the * hwaccess_lock. */ @@ -977,12 +984,10 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, target_katom); -#endif stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, JS_COMMAND_HARD_STOP); -#if KBASE_GPU_RESET_EN if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || needs_workaround_for_afbc)) { @@ -997,7 +1002,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, kbase_reset_gpu_locked(kbdev); } } -#endif } /** @@ -1062,8 +1066,6 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, } } - -#if KBASE_GPU_RESET_EN static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; @@ -1072,34 +1074,32 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL)); + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); for (i = 0; i < 3; i++) { dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), - NULL), - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO), - NULL)); + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG), NULL)); + kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); } static void kbasep_reset_timeout_worker(struct work_struct *data) @@ -1108,7 +1108,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) struct kbase_device *kbdev; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; - bool try_schedule = false; bool silent = false; u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; @@ -1126,9 +1125,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within @@ -1143,15 +1143,18 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_RESET_GPU_NOT_PENDING); kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); - kbase_vinstr_resume(kbdev->vinstr_ctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return; } KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - spin_lock(&kbdev->hwaccess_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); spin_lock(&kbdev->mmu_mask_change); + kbase_pm_reset_start_locked(kbdev); + /* We're about to flush out the IRQs and their bottom half's */ kbdev->irq_reset_flush = true; @@ -1160,8 +1163,7 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_disable_interrupts_nolock(kbdev); spin_unlock(&kbdev->mmu_mask_change); - spin_unlock(&kbdev->hwaccess_lock); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Ensure that any IRQ handlers have finished * Must be done without any locks IRQ handlers will take */ @@ -1223,37 +1225,33 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_enable_interrupts(kbdev); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - kbase_disjoint_state_down(kbdev); - wake_up(&kbdev->hwaccess.backend.reset_wait); - if (!silent) - dev_err(kbdev->dev, "Reset complete"); - - if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) - try_schedule = true; - mutex_unlock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); + kbase_pm_reset_complete(kbdev); + /* Find out what cores are required now */ kbase_pm_update_cores_state(kbdev); /* Synchronously request and wait for those cores, because if * instrumentation is enabled it would need them immediately. */ - kbase_pm_check_transitions_sync(kbdev); + kbase_pm_wait_for_desired_state(kbdev); mutex_unlock(&kbdev->pm.lock); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + /* Try submitting some jobs to restart processing */ - if (try_schedule) { - KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, - 0); - kbase_js_sched_all(kbdev); - } + KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + kbase_js_sched_all(kbdev); /* Process any pending slot updates */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1262,8 +1260,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_context_idle(kbdev); - /* Release vinstr */ - kbase_vinstr_resume(kbdev->vinstr_ctx); + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } @@ -1437,20 +1437,22 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) kbasep_try_reset_gpu_early_locked(kbdev); } -void kbase_reset_gpu_silent(struct kbase_device *kbdev) +int kbase_reset_gpu_silent(struct kbase_device *kbdev) { if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_SILENT) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ - return; + return -EAGAIN; } kbase_disjoint_state_up(kbdev); queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work); + + return 0; } bool kbase_reset_gpu_active(struct kbase_device *kbdev) @@ -1461,4 +1463,3 @@ bool kbase_reset_gpu_active(struct kbase_device *kbdev) return true; } -#endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index d71a9edab94f..452ddee35581 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -159,11 +159,11 @@ void kbase_job_slot_halt(struct kbase_device *kbdev); void kbase_job_slot_term(struct kbase_device *kbdev); /** - * kbase_gpu_cacheclean - Cause a GPU cache clean & flush + * kbase_gpu_cache_clean - Cause a GPU cache clean & flush * @kbdev: Device pointer * * Caller must not be in IRQ context */ -void kbase_gpu_cacheclean(struct kbase_device *kbdev); +void kbase_gpu_cache_clean(struct kbase_device *kbdev); #endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index 7f09fd229748..c714582dfd79 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -30,11 +30,11 @@ #include #include #include +#include #include #include #include #include -#include #include /* Return whether the specified ringbuffer is empty. HW access lock must be @@ -104,8 +104,6 @@ static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; - kbase_js_debug_log_current_affinities(kbdev); - return katom; } @@ -122,12 +120,6 @@ struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; } -struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, - int js) -{ - return kbase_gpu_inspect(kbdev, js, 0); -} - struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, int js) { @@ -305,331 +297,14 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, int js) } -static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom) -{ - /* The most recently checked affinity. Having this at this scope allows - * us to guarantee that we've checked the affinity in this function - * call. - */ - u64 recently_chosen_affinity = 0; - bool chosen_affinity = false; - bool retry; - - do { - retry = false; - - /* NOTE: The following uses a number of FALLTHROUGHs to optimize - * the calls to this function. Ending of the function is - * indicated by BREAK OUT */ - switch (katom->coreref_state) { - /* State when job is first attempted to be run */ - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - KBASE_DEBUG_ASSERT(katom->affinity == 0); - - /* Compute affinity */ - if (false == kbase_js_choose_affinity( - &recently_chosen_affinity, kbdev, katom, - js)) { - /* No cores are currently available */ - /* *** BREAK OUT: No state transition *** */ - break; - } - - chosen_affinity = true; - - /* Request the cores */ - kbase_pm_request_cores(kbdev, - katom->core_req & BASE_JD_REQ_T, - recently_chosen_affinity); - - katom->affinity = recently_chosen_affinity; - - /* Proceed to next state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - { - enum kbase_pm_cores_ready cores_ready; - - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - - cores_ready = kbase_pm_register_inuse_cores( - kbdev, - katom->core_req & BASE_JD_REQ_T, - katom->affinity); - if (cores_ready == KBASE_NEW_AFFINITY) { - /* Affinity no longer valid - return to - * previous state */ - kbasep_js_job_check_deref_cores(kbdev, - katom); - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REGISTER_INUSE_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) katom->affinity); - /* *** BREAK OUT: Return to previous - * state, retry *** */ - retry = true; - break; - } - if (cores_ready == KBASE_CORES_NOT_READY) { - /* Stay in this state and return, to - * retry at this state later */ - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REGISTER_INUSE_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) katom->affinity); - /* *** BREAK OUT: No state transition - * *** */ - break; - } - /* Proceed to next state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; - } - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - - /* Optimize out choosing the affinity twice in the same - * function call */ - if (chosen_affinity == false) { - /* See if the affinity changed since a previous - * call. */ - if (false == kbase_js_choose_affinity( - &recently_chosen_affinity, - kbdev, katom, js)) { - /* No cores are currently available */ - kbasep_js_job_check_deref_cores(kbdev, - katom); - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) recently_chosen_affinity); - /* *** BREAK OUT: Transition to lower - * state *** */ - break; - } - chosen_affinity = true; - } - - /* Now see if this requires a different set of cores */ - if (recently_chosen_affinity != katom->affinity) { - enum kbase_pm_cores_ready cores_ready; - - kbase_pm_request_cores(kbdev, - katom->core_req & BASE_JD_REQ_T, - recently_chosen_affinity); - - /* Register new cores whilst we still hold the - * old ones, to minimize power transitions */ - cores_ready = - kbase_pm_register_inuse_cores(kbdev, - katom->core_req & BASE_JD_REQ_T, - recently_chosen_affinity); - kbasep_js_job_check_deref_cores(kbdev, katom); - - /* Fixup the state that was reduced by - * deref_cores: */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; - katom->affinity = recently_chosen_affinity; - if (cores_ready == KBASE_NEW_AFFINITY) { - /* Affinity no longer valid - return to - * previous state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; - - kbasep_js_job_check_deref_cores(kbdev, - katom); - - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REGISTER_INUSE_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) katom->affinity); - /* *** BREAK OUT: Return to previous - * state, retry *** */ - retry = true; - break; - } - /* Now might be waiting for powerup again, with - * a new affinity */ - if (cores_ready == KBASE_CORES_NOT_READY) { - /* Return to previous state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, - katom->kctx, katom, - katom->jc, js, - (u32) katom->affinity); - /* *** BREAK OUT: Transition to lower - * state *** */ - break; - } - } - /* Proceed to next state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - KBASE_DEBUG_ASSERT(katom->affinity == - recently_chosen_affinity); - - /* Note: this is where the caller must've taken the - * hwaccess_lock */ - - /* Check for affinity violations - if there are any, - * then we just ask the caller to requeue and try again - * later */ - if (kbase_js_affinity_would_violate(kbdev, js, - katom->affinity) != false) { - /* Return to previous state */ - katom->coreref_state = - KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; - /* *** BREAK OUT: Transition to lower state *** - */ - KBASE_TRACE_ADD_SLOT_INFO(kbdev, - JS_CORE_REF_AFFINITY_WOULD_VIOLATE, - katom->kctx, katom, katom->jc, js, - (u32) katom->affinity); - break; - } - - /* No affinity violations would result, so the cores are - * ready */ - katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; - /* *** BREAK OUT: Cores Ready *** */ - break; - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled kbase_atom_coreref_state %d", - katom->coreref_state); - break; - } - } while (retry != false); - - return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); -} - -static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(katom != NULL); - - switch (katom->coreref_state) { - case KBASE_ATOM_COREREF_STATE_READY: - /* State where atom was submitted to the HW - just proceed to - * power-down */ - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - - /* *** FALLTHROUGH *** */ - - case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: - /* State where cores were registered */ - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, - katom->affinity); - - break; - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - /* State where cores were requested, but not registered */ - KBASE_DEBUG_ASSERT(katom->affinity != 0 || - (katom->core_req & BASE_JD_REQ_T)); - kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, - katom->affinity); - break; - - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - /* Initial state - nothing required */ - KBASE_DEBUG_ASSERT(katom->affinity == 0); - break; - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled coreref_state: %d", - katom->coreref_state); - break; - } - - katom->affinity = 0; - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; -} - -static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev, - base_jd_core_req core_req, u64 affinity, - enum kbase_atom_coreref_state coreref_state) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - switch (coreref_state) { - case KBASE_ATOM_COREREF_STATE_READY: - /* State where atom was submitted to the HW - just proceed to - * power-down */ - KBASE_DEBUG_ASSERT(affinity != 0 || - (core_req & BASE_JD_REQ_T)); - - /* *** FALLTHROUGH *** */ - - case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: - /* State where cores were registered */ - KBASE_DEBUG_ASSERT(affinity != 0 || - (core_req & BASE_JD_REQ_T)); - kbase_pm_release_cores(kbdev, core_req & BASE_JD_REQ_T, - affinity); - - break; - - case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: - /* State where cores were requested, but not registered */ - KBASE_DEBUG_ASSERT(affinity != 0 || - (core_req & BASE_JD_REQ_T)); - kbase_pm_unrequest_cores(kbdev, core_req & BASE_JD_REQ_T, - affinity); - break; - - case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: - /* Initial state - nothing required */ - KBASE_DEBUG_ASSERT(affinity == 0); - break; - - default: - KBASE_DEBUG_ASSERT_MSG(false, - "Unhandled coreref_state: %d", - coreref_state); - break; - } -} - static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, ktime_t *end_timestamp) { struct kbase_context *kctx = katom->kctx; + lockdep_assert_held(&kbdev->hwaccess_lock); + switch (katom->gpu_rb_state) { case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: /* Should be impossible */ @@ -659,29 +334,57 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: - kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, - katom->affinity); /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_HWCNT)) + kbase_pm_protected_override_disable(kbdev); + if (!kbase_jd_katom_is_protected(katom) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) + kbase_pm_protected_override_disable(kbdev); + if (katom->protected_state.enter != KBASE_ATOM_ENTER_PROTECTED_CHECK || katom->protected_state.exit != KBASE_ATOM_EXIT_PROTECTED_CHECK) kbdev->protected_mode_transition = false; - + /* If the atom has suspended hwcnt but has not yet entered + * protected mode, then resume hwcnt now. If the GPU is now in + * protected mode then hwcnt will be resumed by GPU reset so + * don't resume it here. + */ if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) { - kbase_vinstr_resume(kbdev->vinstr_ctx); - - /* Go back to configured model for IPA */ - kbase_ipa_model_use_configured_locked(kbdev); + ((katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } } + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (katom->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, false); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + } /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ @@ -702,6 +405,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { + lockdep_assert_held(&kbdev->hwaccess_lock); + kbase_gpu_release_atom(kbdev, katom, NULL); katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; } @@ -769,15 +474,10 @@ static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) return kbdev->protected_mode; } -static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) { - int err = -EINVAL; - lockdep_assert_held(&kbdev->hwaccess_lock); - WARN_ONCE(!kbdev->protected_ops, - "Cannot enter protected mode: protected callbacks not specified.\n"); - /* * When entering into protected mode, we must ensure that the * GPU is not operating in coherent mode as well. This is to @@ -785,17 +485,29 @@ static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) */ if (kbdev->system_coherency == COHERENCY_ACE) kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); +} + +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); if (kbdev->protected_ops) { /* Switch GPU to protected mode */ err = kbdev->protected_ops->protected_mode_enable( kbdev->protected_dev); - if (err) + if (err) { dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", err); - else + } else { kbdev->protected_mode = true; + kbase_ipa_protection_mode_switch_event(kbdev); + } } return err; @@ -813,9 +525,70 @@ static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) /* The protected mode disable callback will be called as part of reset */ - kbase_reset_gpu_silent(kbdev); + return kbase_reset_gpu_silent(kbdev); +} - return 0; +static int kbase_jm_protected_entry(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + err = kbase_gpu_protected_mode_enter(kbdev); + + /* + * Regardless of result before this call, we are no longer + * transitioning the GPU. + */ + + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); + if (err) { + /* + * Failed to switch into protected mode, resume + * GPU hwcnt and fail atom. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* + * Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. + */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + return -EINVAL; + } + + /* + * Protected mode sanity checks. + */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + return err; } static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, @@ -823,6 +596,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, { int err = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); + switch (katom[idx]->protected_state.enter) { case KBASE_ATOM_ENTER_PROTECTED_CHECK: KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); @@ -831,28 +606,41 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, * there are no atoms currently on the GPU. */ WARN_ON(kbdev->protected_mode_transition); WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + /* If hwcnt is disabled, it means we didn't clean up correctly + * during last exit from protected mode. + */ + WARN_ON(kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_transition = true; katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_VINSTR; + KBASE_ATOM_ENTER_PROTECTED_HWCNT; + + kbdev->protected_mode_transition = true; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_ENTER_PROTECTED_VINSTR: - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ - return -EAGAIN; + case KBASE_ATOM_ENTER_PROTECTED_HWCNT: + /* See if we can get away with disabling hwcnt atomically */ + kbdev->protected_mode_hwcnt_desired = false; + if (!kbdev->protected_mode_hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + kbdev->protected_mode_hwcnt_disabled = true; } - /* Use generic model for IPA in protected mode */ - kbase_ipa_model_use_fallback_locked(kbdev); + /* We couldn't disable atomically, so kick off a worker */ + if (!kbdev->protected_mode_hwcnt_disabled) { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#endif + return -EAGAIN; + } /* Once reaching this point GPU must be - * switched to protected mode or vinstr + * switched to protected mode or hwcnt * re-enabled. */ /* @@ -863,6 +651,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, katom[idx]->protected_state.enter = KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + kbase_pm_protected_override_enable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -873,61 +662,84 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. - */ + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ return -EAGAIN; } } katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_FINISHED; + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_ENTER_PROTECTED_FINISHED: - - /* No jobs running, so we can switch GPU mode right now. */ - err = kbase_gpu_protected_mode_enter(kbdev); - + case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: /* - * Regardless of result, we are no longer transitioning - * the GPU. + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. */ - kbdev->protected_mode_transition = false; - KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); - if (err) { + kbase_gpu_disable_coherent(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { /* - * Failed to switch into protected mode, resume - * vinstr core and fail atom. + * Power on L2 caches; this will also result in the + * correct value written to coherency enable register. */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } + kbase_pm_protected_l2_override(kbdev, true); - /* Go back to configured model for IPA */ - kbase_ipa_model_use_configured_locked(kbdev); - - return -EINVAL; + /* + * Set the flag on the atom that additional + * L2 references are taken. + */ + katom[idx]->atom_flags |= + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; } - /* Protected mode sanity checks. */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), - kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) + return -EAGAIN; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + /* + * Check that L2 caches are powered and, if so, + * enter protected mode. + */ + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + /* + * Remove additional L2 reference and reset + * the atom flag which denotes it. + */ + if (katom[idx]->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, + false); + katom[idx]->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } else { + /* + * still waiting for L2 caches to power up + */ + return -EAGAIN; + } + } else { + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } } return 0; @@ -938,6 +750,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, { int err = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); switch (katom[idx]->protected_state.exit) { case KBASE_ATOM_EXIT_PROTECTED_CHECK: @@ -957,6 +770,7 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; kbdev->protected_mode_transition = true; + kbase_pm_protected_override_enable(kbdev); kbase_pm_update_cores_state_nolock(kbdev); /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ @@ -978,8 +792,12 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, /* Issue the reset to the GPU */ err = kbase_gpu_protected_mode_reset(kbdev); + if (err == -EAGAIN) + return -EAGAIN; + if (err) { kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); /* Failed to exit protected mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; @@ -993,10 +811,16 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, kbase_jm_return_atom_to_js(kbdev, katom[idx]); } - kbase_vinstr_resume(kbdev->vinstr_ctx); - - /* Use generic model for IPA in protected mode */ - kbase_ipa_model_use_fallback_locked(kbdev); + /* If we're exiting from protected mode, hwcnt must have + * been disabled during entry. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } return -EINVAL; } @@ -1025,6 +849,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_reset_gpu_active(kbdev)) + return; + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { struct kbase_jd_atom *katom[2]; int idx; @@ -1130,9 +957,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; } - cores_ready = - kbasep_js_job_check_ref_cores(kbdev, js, - katom[idx]); + cores_ready = kbase_pm_cores_requested(kbdev, + true); if (katom[idx]->event_code == BASE_JD_EVENT_PM_EVENT) { @@ -1144,8 +970,6 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) if (!cores_ready) break; - kbase_js_affinity_retain_slot_cores(kbdev, js, - katom[idx]->affinity); katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_AFFINITY; @@ -1260,26 +1084,30 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, if (next_katom && katom->kctx == next_katom->kctx && next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && - HAS_DEP(next_katom) && - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) + (HAS_DEP(next_katom) || next_katom->sched_priority == + katom->sched_priority) && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP, NULL); + JS_COMMAND_NOP); next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; if (completion_code == BASE_JD_EVENT_STOPPED) { - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom, &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as - [katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(katom->kctx, + [next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as + [next_katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx, &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); + [next_katom->slot_nr]); } + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + return true; } @@ -1317,26 +1145,16 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * flushed. To prevent future evictions causing possible memory * corruption we need to flush the cache manually before any * affected memory gets reused. */ - katom->need_cache_flush_cores_retained = katom->affinity; - kbase_pm_request_cores(kbdev, false, katom->affinity); + katom->need_cache_flush_cores_retained = true; } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { if (kbdev->gpu_props.num_core_groups > 1 && - !(katom->affinity & - kbdev->gpu_props.props.coherency_info.group[0].core_mask - ) && - (katom->affinity & - kbdev->gpu_props.props.coherency_info.group[1].core_mask - )) { + katom->device_nr >= 1) { dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); - katom->need_cache_flush_cores_retained = - katom->affinity; - kbase_pm_request_cores(kbdev, false, - katom->affinity); + katom->need_cache_flush_cores_retained = true; } } katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); - kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); if (completion_code == BASE_JD_EVENT_STOPPED) { struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, @@ -1351,6 +1169,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (next_katom && katom->kctx == next_katom->kctx && next_katom->sched_priority == katom->sched_priority) { + WARN_ON(next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED); kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); kbase_jm_return_atom_to_js(kbdev, next_katom); } @@ -1358,6 +1178,13 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int i; + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", + js, completion_code, + kbase_exception_name + (kbdev, + completion_code)); + #if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 KBASE_TRACE_DUMP(kbdev); #endif @@ -1431,10 +1258,6 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) katom->event_code = (base_jd_event_code)completion_code; - kbase_device_trace_register_access(kctx, REG_WRITE, - JOB_CONTROL_REG(JOB_IRQ_CLEAR), - 1 << js); - /* Complete the job, and start new ones * * Also defer remaining work onto the workqueue: @@ -1519,10 +1342,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) break; if (katom->protected_state.exit == KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); - - kbase_vinstr_resume(kbdev->vinstr_ctx); - /* protected mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), @@ -1533,7 +1352,8 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) !kbase_jd_katom_is_protected(katom), "Protected atom on JS%d not supported", js); } - if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) + if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && + !kbase_ctx_flag(katom->kctx, KCTX_DYING)) keep_in_jm_rb = true; kbase_gpu_release_atom(kbdev, katom, NULL); @@ -1544,9 +1364,6 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) * it will be processed again from the starting state. */ if (keep_in_jm_rb) { - kbasep_js_job_check_deref_cores(kbdev, katom); - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->affinity = 0; katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the @@ -1565,7 +1382,19 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) } } + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); + } + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); } static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, @@ -1586,6 +1415,8 @@ static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, u32 action, bool disjoint) { + lockdep_assert_held(&kbdev->hwaccess_lock); + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; kbase_gpu_mark_atom_for_return(kbdev, katom); katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; @@ -1609,12 +1440,6 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } -static void kbase_job_evicted(struct kbase_jd_atom *katom) -{ - kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, - katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); -} - bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, int js, @@ -1690,7 +1515,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, /* katom_idx0 and katom_idx1 are on GPU */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT), NULL) == 0) { + JS_COMMAND_NEXT)) == 0) { /* idx0 has already completed - stop * idx1 if needed*/ if (katom_idx1_valid) { @@ -1705,19 +1530,18 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP, NULL); + JS_COMMAND_NOP); if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO), NULL) + JS_HEAD_NEXT_LO)) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI), NULL) + JS_HEAD_NEXT_HI)) != 0) { /* idx1 removed successfully, * will be handled in IRQ */ - kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, true); @@ -1771,7 +1595,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, } else { /* idx1 is on GPU */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT), NULL) == 0) { + JS_COMMAND_NEXT)) == 0) { /* idx0 has already completed - stop idx1 */ kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); @@ -1781,15 +1605,14 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, * remove */ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP, NULL); + JS_COMMAND_NOP); if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO), NULL) != 0 || + JS_HEAD_NEXT_LO)) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI), NULL) != 0) { + JS_HEAD_NEXT_HI)) != 0) { /* idx1 removed successfully, will be * handled in IRQ once idx0 completes */ - kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); @@ -1817,51 +1640,14 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, return ret; } -void kbase_gpu_cacheclean(struct kbase_device *kbdev) -{ - /* Limit the number of loops to avoid a hang if the interrupt is missed - */ - u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - - mutex_lock(&kbdev->cacheclean_lock); - - /* use GPU_COMMAND completion solution */ - /* clean & invalidate the caches */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES, NULL); - - /* wait for cache flush to complete before continuing */ - while (--max_loops && - (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & - CLEAN_CACHES_COMPLETED) == 0) - ; - - /* clear the CLEAN_CACHES_COMPLETED irq */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, - CLEAN_CACHES_COMPLETED); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), - CLEAN_CACHES_COMPLETED, NULL); - KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING, - "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); - - mutex_unlock(&kbdev->cacheclean_lock); -} - -void kbase_backend_cacheclean(struct kbase_device *kbdev, +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { if (katom->need_cache_flush_cores_retained) { - unsigned long flags; - - kbase_gpu_cacheclean(kbdev); + kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_wait_cache_clean(kbdev); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_unrequest_cores(kbdev, false, - katom->need_cache_flush_cores_retained); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - katom->need_cache_flush_cores_retained = 0; + katom->need_cache_flush_cores_retained = false; } } @@ -1872,7 +1658,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * If cache flush required due to HW workaround then perform the flush * now */ - kbase_backend_cacheclean(kbdev, katom); + kbase_backend_cache_clean(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && (katom->core_req & BASE_JD_REQ_FS) && @@ -1891,26 +1677,11 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; } } - - /* Clear the coreref_state now - while check_deref_cores() may not have - * been called yet, the caller will have taken a copy of this field. If - * this is not done, then if the atom is re-scheduled (following a soft - * stop) then the core reference would not be retaken. */ - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->affinity = 0; } void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req, u64 affinity, - enum kbase_atom_coreref_state coreref_state) + base_jd_core_req core_req) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, affinity, - coreref_state); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (!kbdev->pm.active_count) { mutex_lock(&kbdev->js_data.runpool_mutex); mutex_lock(&kbdev->pm.lock); @@ -1949,6 +1720,3 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } - - - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c deleted file mode 100755 index c937eca8c166..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel affinity manager APIs - */ - -#include -#include "mali_kbase_js_affinity.h" -#include "mali_kbase_hw.h" - -#include - - -bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, - int js) -{ - /* - * Here are the reasons for using job slot 2: - * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) - * - In absence of the above, then: - * - Atoms with BASE_JD_REQ_COHERENT_GROUP - * - But, only when there aren't contexts with - * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on - * all cores on slot 1 could be blocked by those using a coherent group - * on slot 2 - * - And, only when you actually have 2 or more coregroups - if you - * only have 1 coregroup, then having jobs for slot 2 implies they'd - * also be for slot 1, meaning you'll get interference from them. Jobs - * able to run on slot 2 could also block jobs that can only run on - * slot 1 (tiler jobs) - */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) - return true; - - if (js != 2) - return true; - - /* Only deal with js==2 now: */ - if (kbdev->gpu_props.num_core_groups > 1) { - /* Only use slot 2 in the 2+ coregroup case */ - if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, - KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == - false) { - /* ...But only when we *don't* have atoms that run on - * all cores */ - - /* No specific check for BASE_JD_REQ_COHERENT_GROUP - * atoms - the policy will sort that out */ - return true; - } - } - - /* Above checks failed mean we shouldn't use slot 2 */ - return false; -} - -/* - * As long as it has been decided to have a deeper modification of - * what job scheduler, power manager and affinity manager will - * implement, this function is just an intermediate step that - * assumes: - * - all working cores will be powered on when this is called. - * - largest current configuration is 2 core groups. - * - It has been decided not to have hardcoded values so the low - * and high cores in a core split will be evently distributed. - * - Odd combinations of core requirements have been filtered out - * and do not get to this function (e.g. CS+T+NSS is not - * supported here). - * - This function is frequently called and can be optimized, - * (see notes in loops), but as the functionallity will likely - * be modified, optimization has not been addressed. -*/ -bool kbase_js_choose_affinity(u64 * const affinity, - struct kbase_device *kbdev, - struct kbase_jd_atom *katom, int js) -{ - base_jd_core_req core_req = katom->core_req; - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - u64 core_availability_mask; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); - - /* - * If no cores are currently available (core availability policy is - * transitioning) then fail. - */ - if (0 == core_availability_mask) { - *affinity = 0; - return false; - } - - KBASE_DEBUG_ASSERT(js >= 0); - - if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == - BASE_JD_REQ_T) { - /* If the hardware supports XAFFINITY then we'll only enable - * the tiler (which is the default so this is a no-op), - * otherwise enable shader core 0. */ - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - *affinity = 1; - else - *affinity = 0; - - return true; - } - - if (1 == kbdev->gpu_props.num_cores) { - /* trivial case only one core, nothing to do */ - *affinity = core_availability_mask & - kbdev->pm.debug_core_mask[js]; - } else { - if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { - if (js == 0 || num_core_groups == 1) { - /* js[0] and single-core-group systems just get - * the first core group */ - *affinity = - kbdev->gpu_props.props.coherency_info.group[0].core_mask - & core_availability_mask & - kbdev->pm.debug_core_mask[js]; - } else { - /* js[1], js[2] use core groups 0, 1 for - * dual-core-group systems */ - u32 core_group_idx = ((u32) js) - 1; - - KBASE_DEBUG_ASSERT(core_group_idx < - num_core_groups); - *affinity = - kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask - & core_availability_mask & - kbdev->pm.debug_core_mask[js]; - - /* If the job is specifically targeting core - * group 1 and the core availability policy is - * keeping that core group off, then fail */ - if (*affinity == 0 && core_group_idx == 1 && - kbdev->pm.backend.cg1_disabled - == true) - katom->event_code = - BASE_JD_EVENT_PM_EVENT; - } - } else { - /* All cores are available when no core split is - * required */ - *affinity = core_availability_mask & - kbdev->pm.debug_core_mask[js]; - } - } - - /* - * If no cores are currently available in the desired core group(s) - * (core availability policy is transitioning) then fail. - */ - if (*affinity == 0) - return false; - - /* Enable core 0 if tiler required for hardware without XAFFINITY - * support (notes above) */ - if (core_req & BASE_JD_REQ_T) { - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - *affinity = *affinity | 1; - } - - return true; -} - -static inline bool kbase_js_affinity_is_violating( - struct kbase_device *kbdev, - u64 *affinities) -{ - /* This implementation checks whether the two slots involved in Generic - * thread creation have intersecting affinity. This is due to micro- - * architectural issues where a job in slot A targetting cores used by - * slot B could prevent the job in slot B from making progress until the - * job in slot A has completed. - */ - u64 affinity_set_left; - u64 affinity_set_right; - u64 intersection; - - KBASE_DEBUG_ASSERT(affinities != NULL); - - affinity_set_left = affinities[1]; - - affinity_set_right = affinities[2]; - - /* A violation occurs when any bit in the left_set is also in the - * right_set */ - intersection = affinity_set_left & affinity_set_right; - - return (bool) (intersection != (u64) 0u); -} - -bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, - u64 affinity) -{ - struct kbasep_js_device_data *js_devdata; - u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); - js_devdata = &kbdev->js_data; - - memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, - sizeof(js_devdata->runpool_irq.slot_affinities)); - - new_affinities[js] |= affinity; - - return kbase_js_affinity_is_violating(kbdev, new_affinities); -} - -void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, - u64 affinity) -{ - struct kbasep_js_device_data *js_devdata; - u64 cores; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); - js_devdata = &kbdev->js_data; - - KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) - == false); - - cores = affinity; - while (cores) { - int bitnum = fls64(cores) - 1; - u64 bit = 1ULL << bitnum; - s8 cnt; - - cnt = - ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); - - if (cnt == 1) - js_devdata->runpool_irq.slot_affinities[js] |= bit; - - cores &= ~bit; - } -} - -void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, - u64 affinity) -{ - struct kbasep_js_device_data *js_devdata; - u64 cores; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); - js_devdata = &kbdev->js_data; - - cores = affinity; - while (cores) { - int bitnum = fls64(cores) - 1; - u64 bit = 1ULL << bitnum; - s8 cnt; - - KBASE_DEBUG_ASSERT( - js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); - - cnt = - --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); - - if (0 == cnt) - js_devdata->runpool_irq.slot_affinities[js] &= ~bit; - - cores &= ~bit; - } -} - -#if KBASE_TRACE_ENABLE -void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata; - int slot_nr; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - js_devdata = &kbdev->js_data; - - for (slot_nr = 0; slot_nr < 3; ++slot_nr) - KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, - NULL, 0u, slot_nr, - (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); -} -#endif /* KBASE_TRACE_ENABLE */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h deleted file mode 100755 index dbabd94564c7..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Affinity Manager internal APIs. - */ - -#ifndef _KBASE_JS_AFFINITY_H_ -#define _KBASE_JS_AFFINITY_H_ - -/** - * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to - * submit a job to a particular job slot in the current status - * - * @kbdev: The kbase device structure of the device - * @js: Job slot number to check for allowance - * - * Will check if submitting to the given job slot is allowed in the current - * status. For example using job slot 2 while in soft-stoppable state and only - * having 1 coregroup is not allowed by the policy. This function should be - * called prior to submitting a job to a slot to make sure policy rules are not - * violated. - * - * The following locking conditions are made on the caller - * - it must hold hwaccess_lock - */ -bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js); - -/** - * kbase_js_choose_affinity - Compute affinity for a given job. - * - * @affinity: Affinity bitmap computed - * @kbdev: The kbase device structure of the device - * @katom: Job chain of which affinity is going to be found - * @js: Slot the job chain is being submitted - * - * Currently assumes an all-on/all-off power management policy. - * Also assumes there is at least one core with tiler available. - * - * Returns true if a valid affinity was chosen, false if - * no cores were available. - */ -bool kbase_js_choose_affinity(u64 * const affinity, - struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js); - -/** - * kbase_js_affinity_would_violate - Determine whether a proposed affinity on - * job slot @js would cause a violation of affinity restrictions. - * - * @kbdev: Kbase device structure - * @js: The job slot to test - * @affinity: The affinity mask to test - * - * The following locks must be held by the caller - * - hwaccess_lock - * - * Return: true if the affinity would violate the restrictions - */ -bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, - u64 affinity); - -/** - * kbase_js_affinity_retain_slot_cores - Affinity tracking: retain cores used by - * a slot - * - * @kbdev: Kbase device structure - * @js: The job slot retaining the cores - * @affinity: The cores to retain - * - * The following locks must be held by the caller - * - hwaccess_lock - */ -void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, - u64 affinity); - -/** - * kbase_js_affinity_release_slot_cores - Affinity tracking: release cores used - * by a slot - * - * @kbdev: Kbase device structure - * @js: Job slot - * @affinity: Bit mask of core to be released - * - * Cores must be released as soon as a job is dequeued from a slot's 'submit - * slots', and before another job is submitted to those slots. Otherwise, the - * refcount could exceed the maximum number submittable to a slot, - * %BASE_JM_SUBMIT_SLOTS. - * - * The following locks must be held by the caller - * - hwaccess_lock - */ -void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, - u64 affinity); - -/** - * kbase_js_debug_log_current_affinities - log the current affinities - * - * @kbdev: Kbase device structure - * - * Output to the Trace log the current tracked affinities on all slots - */ -#if KBASE_TRACE_ENABLE -void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); -#else /* KBASE_TRACE_ENABLE */ -static inline void -kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) -{ -} -#endif /* KBASE_TRACE_ENABLE */ - -#endif /* _KBASE_JS_AFFINITY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index df2dd5ec0526..7307be403d44 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -116,7 +116,7 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { u32 ticks = atom->ticks++; -#ifndef CONFIG_MALI_JOB_DUMP +#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks; if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { @@ -250,14 +250,12 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) } } } -#if KBASE_GPU_RESET_EN if (reset_needed) { dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); if (kbase_prepare_to_reset_gpu_locked(kbdev)) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* the timer is re-issued if there is contexts in the run-pool */ if (backend->timer_running) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 9cd29828016a..ba5bf721e523 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,15 +66,15 @@ static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, } static int wait_ready(struct kbase_device *kbdev, - unsigned int as_nr, struct kbase_context *kctx) + unsigned int as_nr) { unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); /* Wait for the MMU status to indicate there is no active command, in * case one is pending. Do not log remaining register accesses. */ while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) - val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), NULL); + val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); if (max_loops == 0) { dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); @@ -83,27 +83,24 @@ static int wait_ready(struct kbase_device *kbdev, /* If waiting in loop was performed, log last read value. */ if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) - kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS), kctx); + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); return 0; } -static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, - struct kbase_context *kctx) +static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) { int status; /* write AS_COMMAND when MMU is ready to accept another command */ - status = wait_ready(kbdev, as_nr, kctx); + status = wait_ready(kbdev, as_nr); if (status == 0) - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, - kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); return status; } -static void validate_protected_page_fault(struct kbase_device *kbdev, - struct kbase_context *kctx) +static void validate_protected_page_fault(struct kbase_device *kbdev) { /* GPUs which support (native) protected mode shall not report page * fault addresses unless it has protected debug mode and protected @@ -115,8 +112,7 @@ static void validate_protected_page_fault(struct kbase_device *kbdev, if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { protected_debug_mode = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS), - kctx) & GPU_DBGEN; + GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; } if (!protected_debug_mode) { @@ -145,15 +141,16 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* remember current mask */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); while (bf_bits | pf_bits) { struct kbase_as *as; int as_no; struct kbase_context *kctx; + struct kbase_fault *fault; /* * the while logic ensures we have a bit set, no need to check @@ -162,6 +159,16 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) as_no = ffs(bf_bits | pf_bits) - 1; as = &kbdev->as[as_no]; + /* find the fault type */ + as->fault_type = (bf_bits & (1 << as_no)) ? + KBASE_MMU_FAULT_TYPE_BUS : + KBASE_MMU_FAULT_TYPE_PAGE; + + if (kbase_as_has_bus_fault(as)) + fault = &as->bf_data; + else + fault = &as->pf_data; + /* * Refcount the kctx ASAP - it shouldn't disappear anyway, since * Bus/Page faults _should_ only occur whilst jobs are running, @@ -170,48 +177,34 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) */ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); - /* find faulting address */ - as->fault_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI), - kctx); - as->fault_addr <<= 32; - as->fault_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO), - kctx); + fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI)); + fault->addr <<= 32; + fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO)); /* Mark the fault protected or not */ - as->protected_mode = kbdev->protected_mode; + fault->protected_mode = kbdev->protected_mode; - if (kbdev->protected_mode && as->fault_addr) { + if (kbdev->protected_mode && fault->addr) { /* check if address reporting is allowed */ - validate_protected_page_fault(kbdev, kctx); + validate_protected_page_fault(kbdev); } /* report the fault to debugfs */ kbase_as_fault_debugfs_new(kbdev, as_no); /* record the fault status */ - as->fault_status = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, - AS_FAULTSTATUS), - kctx); - - /* find the fault type */ - as->fault_type = (bf_bits & (1 << as_no)) ? - KBASE_MMU_FAULT_TYPE_BUS : - KBASE_MMU_FAULT_TYPE_PAGE; + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - as->fault_extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), - kctx); - as->fault_extra_addr <<= 32; - as->fault_extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), - kctx); + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); } if (kbase_as_has_bus_fault(as)) { @@ -234,32 +227,35 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* Process the interrupt for this address space */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_interrupt_process(kbdev, kctx, as); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } /* reenable interrupts */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } -void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx) +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) { struct kbase_mmu_setup *current_setup = &as->current_setup; - u32 transcfg = 0; + u64 transcfg = 0; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + transcfg = current_setup->transcfg; /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ /* Clear PTW_MEMATTR bits */ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; /* Enable correct PTW_MEMATTR bits */ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; if (kbdev->system_coherency == COHERENCY_ACE) { /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ @@ -270,35 +266,34 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, } kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg, kctx); + transcfg); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, - kctx); + (transcfg >> 32) & 0xFFFFFFFFUL); } else { if (kbdev->system_coherency == COHERENCY_ACE) current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; } kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), - current_setup->transtab & 0xFFFFFFFFUL, kctx); + current_setup->transtab & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), - (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx); + (current_setup->transtab >> 32) & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), - current_setup->memattr & 0xFFFFFFFFUL, kctx); + current_setup->memattr & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), - (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); + (current_setup->memattr >> 32) & 0xFFFFFFFFUL); KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, current_setup->transtab, current_setup->memattr, transcfg); - write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); } int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op, + u64 vpfn, u32 nr, u32 op, unsigned int handling_irq) { int ret; @@ -307,22 +302,22 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, if (op == AS_COMMAND_UNLOCK) { /* Unlock doesn't require a lock first */ - ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); } else { u64 lock_addr = lock_region(kbdev, vpfn, nr); /* Lock the region that needs to be updated */ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), - lock_addr & 0xFFFFFFFFUL, kctx); + lock_addr & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), - (lock_addr >> 32) & 0xFFFFFFFFUL, kctx); - write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx); + (lock_addr >> 32) & 0xFFFFFFFFUL); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); /* Run the MMU operation */ - write_cmd(kbdev, as->number, op, kctx); + write_cmd(kbdev, as->number, op); /* Wait for the flush to complete */ - ret = wait_ready(kbdev, as->number, kctx); + ret = wait_ready(kbdev, as->number); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { /* Issue an UNLOCK command to ensure that valid page @@ -339,8 +334,8 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, commands in order to flush the MMU/uTLB, see PRLAM-8812. */ - write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); - write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); } } @@ -348,7 +343,7 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, } void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, enum kbase_mmu_fault_type type) + enum kbase_mmu_fault_type type) { unsigned long flags; u32 pf_bf_mask; @@ -368,14 +363,14 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) pf_bf_mask |= MMU_BUS_ERROR(as->number); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, enum kbase_mmu_fault_type type) + enum kbase_mmu_fault_type type) { unsigned long flags; u32 irq_mask; @@ -391,14 +386,14 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, if (kbdev->irq_reset_flush) goto unlock; - irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | MMU_PAGE_FAULT(as->number); if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) irq_mask |= MMU_BUS_ERROR(as->number); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c index 2ed7dfdde6cc..51a10a231df0 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,9 +29,9 @@ #include #include -static u64 always_on_get_core_mask(struct kbase_device *kbdev) +static bool always_on_shaders_needed(struct kbase_device *kbdev) { - return kbdev->gpu_props.props.raw_props.shader_present; + return true; } static bool always_on_get_core_active(struct kbase_device *kbdev) @@ -59,7 +59,7 @@ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { "always_on", /* name */ always_on_init, /* init */ always_on_term, /* term */ - always_on_get_core_mask, /* get_core_mask */ + always_on_shaders_needed, /* shaders_needed */ always_on_get_core_active, /* get_core_active */ 0u, /* flags */ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h index d61d0d0e3640..e7927cf82e5a 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h @@ -1,7 +1,6 @@ - /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,13 +36,13 @@ * * - When KBase indicates that the GPU will be powered up, but we don't yet * know which Job Chains are to be run: - * All Shader Cores are powered up, regardless of whether or not they will - * be needed later. + * Shader Cores are powered up, regardless of whether or not they will be + * needed later. * - * - When KBase indicates that a set of Shader Cores are needed to submit the - * currently queued Job Chains: - * All Shader Cores are kept powered, regardless of whether or not they will - * be needed + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * Shader Cores are kept powered, regardless of whether or not they will be + * needed * * - When KBase indicates that the GPU need not be powered: * The Shader Cores are kept powered, regardless of whether or not they will diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 6069c0f7f1c4..c19a0d134696 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -31,11 +31,13 @@ #include #include +#include #include #include #include static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); int kbase_pm_runtime_init(struct kbase_device *kbdev) { @@ -112,7 +114,7 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev) kbdev->pm.backend.gpu_powered = false; } -int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev) { int ret = 0; @@ -128,12 +130,12 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, kbase_pm_gpu_poweroff_wait_wq); + kbdev->pm.backend.ca_cores_enabled = ~0ull; kbdev->pm.backend.gpu_powered = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_DEBUG kbdev->pm.backend.driver_ready_for_irqs = false; #endif /* CONFIG_MALI_DEBUG */ - kbdev->pm.backend.gpu_in_desired_state = true; init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); /* Initialise the metrics subsystem */ @@ -141,9 +143,6 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (ret) return ret; - init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); - kbdev->pm.backend.l2_powered = 0; - init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); kbdev->pm.backend.reset_done = false; @@ -161,8 +160,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) if (kbase_pm_policy_init(kbdev) != 0) goto pm_policy_fail; + if (kbase_pm_state_machine_init(kbdev) != 0) + goto pm_state_machine_fail; + return 0; +pm_state_machine_fail: + kbase_pm_policy_term(kbdev); pm_policy_fail: kbase_pm_ca_term(kbdev); workq_fail: @@ -170,6 +174,19 @@ workq_fail: return -EINVAL; } +int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.hwcnt_desired = false; + kbdev->pm.backend.hwcnt_disabled = true; + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, + kbase_pm_hwcnt_disable_worker); + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + return 0; +} + void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) { lockdep_assert_held(&kbdev->pm.lock); @@ -178,12 +195,19 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) * kbase_pm_clock_off() */ kbase_pm_clock_on(kbdev, is_resume); + if (!is_resume) { + unsigned long flags; + + /* Force update of L2 state - if we have abandoned a power off + * then this may be required to power the L2 back on. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + /* Update core status as required by the policy */ - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); kbase_pm_update_cores_state(kbdev); - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); /* NOTE: We don't wait to reach the desired state, since running atoms * will wait for that state to be reached anyway */ @@ -198,44 +222,24 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; -#if !PLATFORM_POWER_DOWN_ONLY - /* Wait for power transitions to complete. We do this with no locks held - * so that we don't deadlock with any pending workqueues */ - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); - kbase_pm_check_transitions_sync(kbdev); - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + if (!platform_power_down_only) + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues. + */ + kbase_pm_wait_for_desired_state(kbdev); mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); -#if PLATFORM_POWER_DOWN_ONLY - if (kbdev->pm.backend.gpu_powered) { - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) { - /* If L2 cache is powered then we must flush it before - * we power off the GPU. Normally this would have been - * handled when the L2 was powered off. */ - kbase_gpu_cacheclean(kbdev); - } - } -#endif /* PLATFORM_POWER_DOWN_ONLY */ - if (!backend->poweron_required) { -#if !PLATFORM_POWER_DOWN_ONLY - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - WARN_ON(kbdev->l2_available_bitmap || - kbdev->shader_available_bitmap || - kbdev->tiler_available_bitmap); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + if (!platform_power_down_only) { + unsigned long flags; - /* Consume any change-state events */ - kbase_timeline_pm_check_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF || + backend->l2_state != KBASE_L2_OFF); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } /* Disable interrupts and turn the clock off */ if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { @@ -268,6 +272,8 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) backend->poweroff_wait_in_progress = false; if (backend->poweron_required) { backend->poweron_required = false; + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); kbase_pm_update_cores_state_nolock(kbdev); kbase_backend_slot_update(kbdev); } @@ -279,6 +285,45 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) wake_up(&kbdev->pm.backend.poweroff_wait); } +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.hwcnt_disable_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + + if (do_disable) { + /* PM state did not change while we were doing the disable, + * so commit the work we just performed and continue the state + * machine. + */ + backend->hwcnt_disabled = true; + kbase_pm_update_state(kbdev); + } else { + /* PM state was updated while we were doing the disable, + * so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) { unsigned long flags; @@ -286,29 +331,36 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) lockdep_assert_held(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (!kbdev->pm.backend.poweroff_wait_in_progress) { - /* Force all cores off */ - kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; - - /* Force all cores to be unavailable, in the situation where - * transitions are in progress for some cores but not others, - * and kbase_pm_check_transitions_nolock can not immediately - * power off the cores */ - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_available_bitmap = 0; - - kbdev->pm.backend.poweroff_wait_in_progress = true; - kbdev->pm.backend.poweroff_is_suspend = is_suspend; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /*Kick off wq here. Callers will have to wait*/ - queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); + spin_lock(&kbdev->pm.backend.gpu_powered_lock); + if (!kbdev->pm.backend.gpu_powered) { + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + goto unlock_hwaccess; } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); } + + if (kbdev->pm.backend.poweroff_wait_in_progress) + goto unlock_hwaccess; + + /* Force all cores off */ + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.poweroff_is_suspend = is_suspend; + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; + + /* l2_desired being false should cause the state machine to + * start powering off the L2. When it actually is powered off, + * the interrupt handler will call kbase_pm_l2_update_state() + * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. + * Callers of this function will need to wait on poweroff_wait. + */ + kbase_pm_update_state(kbdev); + +unlock_hwaccess: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } static bool is_poweroff_in_progress(struct kbase_device *kbdev) @@ -353,8 +405,6 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, return ret; } - kbasep_pm_init_core_use_bitmaps(kbdev); - kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] = @@ -397,20 +447,20 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev != NULL); mutex_lock(&kbdev->pm.lock); - kbase_pm_cancel_deferred_poweroff(kbdev); kbase_pm_do_poweroff(kbdev, false); mutex_unlock(&kbdev->pm.lock); } KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); -void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); /* Free any resources the policy allocated */ + kbase_pm_state_machine_term(kbdev); kbase_pm_policy_term(kbdev); kbase_pm_ca_term(kbdev); @@ -420,26 +470,30 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } +void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); + + if (kbdev->pm.backend.hwcnt_disabled) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + void kbase_pm_power_changed(struct kbase_device *kbdev) { - bool cores_are_available; unsigned long flags; - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); + kbase_pm_update_state(kbdev); - if (cores_are_available) { - /* Log timelining information that a change in state has - * completed */ - kbase_timeline_pm_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + kbase_backend_slot_update(kbdev); - kbase_backend_slot_update(kbdev); - } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -476,7 +530,6 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); - kbase_pm_cancel_deferred_poweroff(kbdev); kbase_pm_do_poweroff(kbdev, true); kbase_backend_timer_suspend(kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index 5b369fb1b515..2cb9452d7f60 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,160 +28,80 @@ #include #include -static const struct kbase_pm_ca_policy *const policy_list[] = { - &kbase_pm_ca_fixed_policy_ops, -#ifdef CONFIG_MALI_DEVFREQ - &kbase_pm_ca_devfreq_policy_ops, -#endif -#if !MALI_CUSTOMER_RELEASE - &kbase_pm_ca_random_policy_ops -#endif -}; - -/** - * POLICY_COUNT - The number of policies available in the system. - * - * This is derived from the number of functions listed in policy_list. - */ -#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) - int kbase_pm_ca_init(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev != NULL); - - kbdev->pm.backend.ca_current_policy = policy_list[0]; +#ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; - kbdev->pm.backend.ca_current_policy->init(kbdev); + if (kbdev->current_core_mask) + pm_backend->ca_cores_enabled = kbdev->current_core_mask; + else + pm_backend->ca_cores_enabled = + kbdev->gpu_props.props.raw_props.shader_present; +#endif return 0; } void kbase_pm_ca_term(struct kbase_device *kbdev) { - kbdev->pm.backend.ca_current_policy->term(kbdev); -} - -int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list) -{ - if (!list) - return POLICY_COUNT; - - *list = policy_list; - - return POLICY_COUNT; -} - -KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies); - -const struct kbase_pm_ca_policy -*kbase_pm_ca_get_policy(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - return kbdev->pm.backend.ca_current_policy; } -KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy); - -void kbase_pm_ca_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_ca_policy *new_policy) +#ifdef CONFIG_MALI_DEVFREQ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) { - const struct kbase_pm_ca_policy *old_policy; + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; unsigned long flags; - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(new_policy != NULL); - - KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, - new_policy->id); - - /* During a policy change we pretend the GPU is active */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread */ - kbase_pm_context_active(kbdev); - - mutex_lock(&kbdev->pm.lock); - - /* Remove the policy to prevent IRQ handlers from working on it */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - old_policy = kbdev->pm.backend.ca_current_policy; - kbdev->pm.backend.ca_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (old_policy->term) - old_policy->term(kbdev); - if (new_policy->init) - new_policy->init(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.ca_current_policy = new_policy; + if (!(core_mask & kbdev->pm.debug_core_mask_all)) { + dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); + goto unlock; + } - /* If any core power state changes were previously attempted, but - * couldn't be made because the policy was changing (current_policy was - * NULL), then re-try them here. */ - kbase_pm_update_cores_state_nolock(kbdev); + pm_backend->ca_cores_enabled = core_mask; - kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, - kbdev->shader_ready_bitmap, - kbdev->shader_transitioning_bitmap); + kbase_pm_update_state(kbdev); +unlock: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->pm.lock); - - /* Now the policy change is finished, we release our fake context active - * reference */ - kbase_pm_context_idle(kbdev); + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", + pm_backend->ca_cores_enabled); } - -KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); +#endif u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + lockdep_assert_held(&kbdev->hwaccess_lock); /* All cores must be enabled when instrumentation is in use */ - if (kbdev->pm.backend.instr_enabled) - return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask_all; - - if (kbdev->pm.backend.ca_current_policy == NULL) + if (pm_backend->instr_enabled) return kbdev->gpu_props.props.raw_props.shader_present & kbdev->pm.debug_core_mask_all; - return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & - kbdev->pm.debug_core_mask_all; +#ifdef CONFIG_MALI_DEVFREQ + return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; +#else + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask_all; +#endif } KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); -void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, - u64 cores_transitioning) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->pm.backend.ca_current_policy != NULL) - kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, - cores_ready, - cores_transitioning); -} - void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = true; - - kbase_pm_update_cores_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); kbdev->pm.backend.instr_enabled = false; - - kbase_pm_update_cores_state_nolock(kbdev); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h index 2b005c9fe4e3..274581d0393a 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c deleted file mode 100755 index 4bb4c400efe7..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * A core availability policy implementing core mask selection from devfreq OPPs - * - */ - -#include -#include -#include -#include - -void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) -{ - struct kbasep_pm_ca_policy_devfreq *data = - &kbdev->pm.backend.ca_policy_data.devfreq; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - data->cores_desired = core_mask; - - /* Disable any cores that are now unwanted */ - data->cores_enabled &= data->cores_desired; - - kbdev->pm.backend.ca_in_transition = true; - - /* If there are no cores to be powered off then power on desired cores - */ - if (!(data->cores_used & ~data->cores_desired)) { - data->cores_enabled = data->cores_desired; - kbdev->pm.backend.ca_in_transition = false; - } - - kbase_pm_update_cores_state_nolock(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX %llX\n", - data->cores_desired, data->cores_enabled); -} - -static void devfreq_init(struct kbase_device *kbdev) -{ - struct kbasep_pm_ca_policy_devfreq *data = - &kbdev->pm.backend.ca_policy_data.devfreq; - - if (kbdev->current_core_mask) { - data->cores_enabled = kbdev->current_core_mask; - data->cores_desired = kbdev->current_core_mask; - } else { - data->cores_enabled = - kbdev->gpu_props.props.raw_props.shader_present; - data->cores_desired = - kbdev->gpu_props.props.raw_props.shader_present; - } - data->cores_used = 0; - kbdev->pm.backend.ca_in_transition = false; -} - -static void devfreq_term(struct kbase_device *kbdev) -{ -} - -static u64 devfreq_get_core_mask(struct kbase_device *kbdev) -{ - return kbdev->pm.backend.ca_policy_data.devfreq.cores_enabled; -} - -static void devfreq_update_core_status(struct kbase_device *kbdev, - u64 cores_ready, - u64 cores_transitioning) -{ - struct kbasep_pm_ca_policy_devfreq *data = - &kbdev->pm.backend.ca_policy_data.devfreq; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - data->cores_used = cores_ready | cores_transitioning; - - /* If in desired state then clear transition flag */ - if (data->cores_enabled == data->cores_desired) - kbdev->pm.backend.ca_in_transition = false; - - /* If all undesired cores are now off then power on desired cores. - * The direct comparison against cores_enabled limits potential - * recursion to one level */ - if (!(data->cores_used & ~data->cores_desired) && - data->cores_enabled != data->cores_desired) { - data->cores_enabled = data->cores_desired; - - kbase_pm_update_cores_state_nolock(kbdev); - - kbdev->pm.backend.ca_in_transition = false; - } -} - -/* - * The struct kbase_pm_ca_policy structure for the devfreq core availability - * policy. - * - * This is the static structure that defines the devfreq core availability power - * policy's callback and name. - */ -const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops = { - "devfreq", /* name */ - devfreq_init, /* init */ - devfreq_term, /* term */ - devfreq_get_core_mask, /* get_core_mask */ - devfreq_update_core_status, /* update_core_status */ - 0u, /* flags */ - KBASE_PM_CA_POLICY_ID_DEVFREQ, /* id */ -}; - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c deleted file mode 100755 index 1eea7e877f61..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * A power policy implementing fixed core availability - */ - -#include -#include - -static void fixed_init(struct kbase_device *kbdev) -{ - kbdev->pm.backend.ca_in_transition = false; -} - -static void fixed_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static u64 fixed_get_core_mask(struct kbase_device *kbdev) -{ - return kbdev->gpu_props.props.raw_props.shader_present; -} - -static void fixed_update_core_status(struct kbase_device *kbdev, - u64 cores_ready, - u64 cores_transitioning) -{ - CSTD_UNUSED(kbdev); - CSTD_UNUSED(cores_ready); - CSTD_UNUSED(cores_transitioning); -} - -/* - * The struct kbase_pm_policy structure for the fixed power policy. - * - * This is the static structure that defines the fixed power policy's callback - * and name. - */ -const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { - "fixed", /* name */ - fixed_init, /* init */ - fixed_term, /* term */ - fixed_get_core_mask, /* get_core_mask */ - fixed_update_core_status, /* update_core_status */ - 0u, /* flags */ - KBASE_PM_CA_POLICY_ID_FIXED, /* id */ -}; - -KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h deleted file mode 100755 index 68a2eac4a121..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * A power policy implementing fixed core availability - */ - -#ifndef MALI_KBASE_PM_CA_FIXED_H -#define MALI_KBASE_PM_CA_FIXED_H - -/** - * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data - * - * @dummy: Dummy member - no state is needed - * - * This contains data that is private to the particular power policy that is - * active. - */ -struct kbasep_pm_ca_policy_fixed { - int dummy; -}; - -extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; - -#endif /* MALI_KBASE_PM_CA_FIXED_H */ - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c index 602e175dbbb9..e90c44def25e 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,22 +29,14 @@ #include #include -static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) +static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) { - if (kbdev->pm.active_count == 0) - return 0; - - return kbdev->gpu_props.props.raw_props.shader_present; + return kbase_pm_is_active(kbdev); } static bool coarse_demand_get_core_active(struct kbase_device *kbdev) { - if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) - return false; - - return true; + return kbase_pm_is_active(kbdev); } static void coarse_demand_init(struct kbase_device *kbdev) @@ -66,7 +58,7 @@ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { "coarse_demand", /* name */ coarse_demand_init, /* init */ coarse_demand_term, /* term */ - coarse_demand_get_core_mask, /* get_core_mask */ + coarse_demand_shaders_needed, /* shaders_needed */ coarse_demand_get_core_active, /* get_core_active */ 0u, /* flags */ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h index f2b49eb4bcac..304e5d7fa32d 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,11 +35,11 @@ * characteristics: * - When KBase indicates that the GPU will be powered up, but we don't yet * know which Job Chains are to be run: - * - All Shader Cores are powered up, regardless of whether or not they will - * be needed later. - * - When KBase indicates that a set of Shader Cores are needed to submit the - * currently queued Job Chains: - * - All Shader Cores are kept powered, regardless of whether or not they will + * - Shader Cores are powered up, regardless of whether or not they will be + * needed later. + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * - Shader Cores are kept powered, regardless of whether or not they will * be needed * - When KBase indicates that the GPU need not be powered: * - The Shader Cores are powered off, and the GPU itself is powered off too. diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 417f6f896eff..0cff22e19d99 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -27,18 +27,10 @@ #ifndef _KBASE_PM_HWACCESS_DEFS_H_ #define _KBASE_PM_HWACCESS_DEFS_H_ -#include "mali_kbase_pm_ca_fixed.h" -#include "mali_kbase_pm_ca_devfreq.h" -#if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_pm_ca_random.h" -#endif - #include "mali_kbase_pm_always_on.h" #include "mali_kbase_pm_coarse_demand.h" -#include "mali_kbase_pm_demand.h" #if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_pm_demand_always_powered.h" -#include "mali_kbase_pm_fast_start.h" +#include "mali_kbase_pm_always_on_demand.h" #endif /* Forward definition - see mali_kbase.h */ @@ -70,6 +62,70 @@ enum kbase_pm_core_type { KBASE_PM_CORE_STACK = STACK_PRESENT_LO }; +/** + * enum kbase_l2_core_state - The states used for the L2 cache & tiler power + * state machine. + * + * @KBASE_L2_OFF: The L2 cache and tiler are off + * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on + * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being + * enabled + * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled + * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being + * disabled + * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off + * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off + * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state + * are unknown + */ +enum kbase_l2_core_state { + KBASE_L2_OFF = 0, + KBASE_L2_PEND_ON, + KBASE_L2_ON_HWCNT_ENABLE, + KBASE_L2_ON, + KBASE_L2_ON_HWCNT_DISABLE, + KBASE_L2_POWER_DOWN, + KBASE_L2_PEND_OFF, + KBASE_L2_RESET_WAIT +}; + +/** + * enum kbase_shader_core_state - The states used for the shaders' state machine. + * + * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have + * been requested to power on + * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on + * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on + * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to + * power off, but they remain on for the + * duration of the hysteresis timer + * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are + * off, but the tick timer + * cancellation is still + * pending. + * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power + * states are unknown + */ +enum kbase_shader_core_state { + KBASE_SHADERS_OFF_CORESTACK_OFF = 0, + KBASE_SHADERS_OFF_CORESTACK_PEND_ON, + KBASE_SHADERS_PEND_ON_CORESTACK_ON, + KBASE_SHADERS_ON_CORESTACK_ON, + KBASE_SHADERS_WAIT_OFF_CORESTACK_ON, + KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON, + KBASE_SHADERS_PEND_OFF_CORESTACK_ON, + KBASE_SHADERS_OFF_CORESTACK_PEND_OFF, + KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF, + KBASE_SHADERS_RESET_WAIT +}; + /** * struct kbasep_pm_metrics - Metrics data collected for use by the power * management framework. @@ -134,21 +190,39 @@ struct kbasep_pm_metrics_state { #endif }; +/** + * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer + * @wq: Work queue to wait for the timer to stopped + * @work: Work item which cancels the timer + * @timer: Timer for powering off the shader cores + * @configured_interval: Period of GPU poweroff timer + * @configured_ticks: User-configured number of ticks to wait after the shader + * power down request is received before turning off the cores + * @remaining_ticks: Number of remaining timer ticks until shaders are powered off + * @cancel_queued: True if the cancellation work item has been queued. This is + * required to ensure that it is not queued twice, e.g. after + * a reset, which could cause the timer to be incorrectly + * cancelled later by a delayed workitem. + * @needed: Whether the timer should restart itself + */ +struct kbasep_pm_tick_timer_state { + struct workqueue_struct *wq; + struct work_struct work; + struct hrtimer timer; + + ktime_t configured_interval; + unsigned int configured_ticks; + unsigned int remaining_ticks; + + bool cancel_queued; + bool needed; +}; + union kbase_pm_policy_data { struct kbasep_pm_policy_always_on always_on; struct kbasep_pm_policy_coarse_demand coarse_demand; - struct kbasep_pm_policy_demand demand; #if !MALI_CUSTOMER_RELEASE - struct kbasep_pm_policy_demand_always_powered demand_always_powered; - struct kbasep_pm_policy_fast_start fast_start; -#endif -}; - -union kbase_pm_ca_policy_data { - struct kbasep_pm_ca_policy_fixed fixed; - struct kbasep_pm_ca_policy_devfreq devfreq; -#if !MALI_CUSTOMER_RELEASE - struct kbasep_pm_ca_policy_random random; + struct kbasep_pm_policy_always_on_demand always_on_demand; #endif }; @@ -158,45 +232,17 @@ union kbase_pm_ca_policy_data { * This structure contains data for the power management framework. There is one * instance of this structure per device in the system. * - * @ca_current_policy: The policy that is currently actively controlling core - * availability. * @pm_current_policy: The policy that is currently actively controlling the * power state. - * @ca_policy_data: Private data for current CA policy * @pm_policy_data: Private data for current PM policy - * @ca_in_transition: Flag indicating when core availability policy is - * transitioning cores. The core availability policy must - * set this when a change in core availability is occurring. - * power_change_lock must be held when accessing this. * @reset_done: Flag when a reset is complete * @reset_done_wait: Wait queue to wait for changes to @reset_done - * @l2_powered_wait: Wait queue for whether the l2 cache has been powered as - * requested - * @l2_powered: State indicating whether all the l2 caches are powered. - * Non-zero indicates they're *all* powered - * Zero indicates that some (or all) are not powered * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter * users * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests - * @desired_shader_state: A bit mask identifying the shader cores that the - * power policy would like to be on. The current state - * of the cores may be different, but there should be - * transitions in progress that will eventually achieve - * this state (assuming that the policy doesn't change - * its mind in the mean time). - * @powering_on_shader_state: A bit mask indicating which shader cores are - * currently in a power-on transition - * @desired_tiler_state: A bit mask identifying the tiler cores that the power - * policy would like to be on. See @desired_shader_state - * @powering_on_tiler_state: A bit mask indicating which tiler core are - * currently in a power-on transition - * @powering_on_l2_state: A bit mask indicating which l2-caches are currently - * in a power-on transition - * @powering_on_stack_state: A bit mask indicating which core stacks are - * currently in a power-on transition - * @gpu_in_desired_state: This flag is set if the GPU is powered as requested - * by the desired_xxx_state variables - * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0 + * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired + * state according to the L2 and shader power state + * machines * @gpu_powered: Set to true when the GPU is powered and register * accesses are possible, false otherwise * @instr_enabled: Set to true when instrumentation is enabled, @@ -209,26 +255,12 @@ union kbase_pm_ca_policy_data { * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or * accessing @driver_ready_for_irqs * @metrics: Structure to hold metrics for the GPU - * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is - * powered off - * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders - * and/or timers are powered off - * @gpu_poweroff_timer: Timer for powering off GPU - * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires - * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq - * @shader_poweroff_pending: Bit mask of shaders to be powered off on next - * timer callback - * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer - * callback - * @poweroff_timer_needed: true if the poweroff timer is currently required, - * false otherwise - * @poweroff_timer_running: true if the poweroff timer is currently running, - * false otherwise - * power_change_lock should be held when accessing, - * unless there is no way the timer can be running (eg - * hrtimer_cancel() was called immediately before) + * @shader_tick_timer: Structure to hold the shader poweroff tick timer state * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. * hwaccess_lock must be held when accessing + * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state + * machine should invoke the poweroff + * worker after the L2 has turned off. * @poweron_required: true if a GPU power on is required. Should only be set * when poweroff_wait_in_progress is true, and therefore the * GPU can not immediately be powered on. pm.lock must be @@ -252,39 +284,50 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See * &struct kbase_pm_callback_conf + * @ca_cores_enabled: Cores that are currently available + * @l2_state: The current state of the L2 cache state machine. See + * &enum kbase_l2_core_state + * @l2_desired: True if the L2 cache should be powered on by the L2 cache state + * machine + * @shaders_state: The current state of the shader state machine. + * @shaders_avail: This is updated by the state machine when it is in a state + * where it can handle changes to the core availability. This + * is internal to the shader state machine and should *not* be + * modified elsewhere. + * @shaders_desired: True if the PM active count or power policy requires the + * shader cores to be on. This is used as an input to the + * shader power state machine. The current state of the + * cores may be different, but there should be transitions in + * progress that will eventually achieve this state (assuming + * that the policy doesn't change its mind in the mean time). + * @in_reset: True if a GPU is resetting and normal power manager operation is + * suspended + * @protected_transition_override : True if a protected mode transition is in + * progress and is overriding power manager + * behaviour. + * @protected_l2_override : Non-zero if the L2 cache is required during a + * protected mode transition. Has no effect if not + * transitioning. + * @hwcnt_desired: True if we want GPU hardware counters to be enabled. + * @hwcnt_disabled: True if GPU hardware counters are not enabled. + * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if + * atomic disable is not possible. * * Note: - * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the - * policy is being changed with kbase_pm_ca_set_policy() or - * kbase_pm_set_policy(). The change is protected under - * kbase_device.pm.power_change_lock. Direct access to this - * from IRQ context must therefore check for NULL. If NULL, then - * kbase_pm_ca_set_policy() or kbase_pm_set_policy() will re-issue the policy - * functions that would have been done under IRQ. + * During an IRQ, @pm_current_policy can be NULL when the policy is being + * changed with kbase_pm_set_policy(). The change is protected under + * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context + * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will + * re-issue the policy functions that would have been done under IRQ. */ struct kbase_pm_backend_data { - const struct kbase_pm_ca_policy *ca_current_policy; const struct kbase_pm_policy *pm_current_policy; - union kbase_pm_ca_policy_data ca_policy_data; union kbase_pm_policy_data pm_policy_data; - bool ca_in_transition; bool reset_done; wait_queue_head_t reset_done_wait; - wait_queue_head_t l2_powered_wait; - int l2_powered; int gpu_cycle_counter_requests; spinlock_t gpu_cycle_counter_requests_lock; - u64 desired_shader_state; - u64 powering_on_shader_state; - u64 desired_tiler_state; - u64 powering_on_tiler_state; - u64 powering_on_l2_state; -#ifdef CONFIG_MALI_CORESTACK - u64 powering_on_stack_state; -#endif /* CONFIG_MALI_CORESTACK */ - - bool gpu_in_desired_state; wait_queue_head_t gpu_in_desired_state_wait; bool gpu_powered; @@ -299,23 +342,12 @@ struct kbase_pm_backend_data { spinlock_t gpu_powered_lock; - struct kbasep_pm_metrics_state metrics; - int gpu_poweroff_pending; - int shader_poweroff_pending_time; - - struct hrtimer gpu_poweroff_timer; - struct workqueue_struct *gpu_poweroff_wq; - struct work_struct gpu_poweroff_work; - - u64 shader_poweroff_pending; - u64 tiler_poweroff_pending; - - bool poweroff_timer_needed; - bool poweroff_timer_running; + struct kbasep_pm_tick_timer_state shader_tick_timer; bool poweroff_wait_in_progress; + bool invoke_poweroff_wait_wq_when_l2_off; bool poweron_required; bool poweroff_is_suspend; @@ -331,22 +363,39 @@ struct kbase_pm_backend_data { int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); int (*callback_power_runtime_idle)(struct kbase_device *kbdev); + + u64 ca_cores_enabled; + + enum kbase_l2_core_state l2_state; + enum kbase_shader_core_state shaders_state; + u64 shaders_avail; + bool l2_desired; + bool shaders_desired; + + bool in_reset; + + bool protected_transition_override; + int protected_l2_override; + + bool hwcnt_desired; + bool hwcnt_disabled; + struct work_struct hwcnt_disable_work; }; /* List of policy IDs */ enum kbase_pm_policy_id { - KBASE_PM_POLICY_ID_DEMAND = 1, - KBASE_PM_POLICY_ID_ALWAYS_ON, KBASE_PM_POLICY_ID_COARSE_DEMAND, #if !MALI_CUSTOMER_RELEASE - KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, - KBASE_PM_POLICY_ID_FAST_START + KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, #endif + KBASE_PM_POLICY_ID_ALWAYS_ON }; typedef u32 kbase_pm_policy_flags; +#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u) + /** * struct kbase_pm_policy - Power policy structure. * @@ -356,7 +405,7 @@ typedef u32 kbase_pm_policy_flags; * @name: The name of this policy * @init: Function called when the policy is selected * @term: Function called when the policy is unselected - * @get_core_mask: Function called to get the current shader core mask + * @shaders_needed: Function called to find out if shader cores are needed * @get_core_active: Function called to get the current overall GPU power * state * @flags: Field indicating flags for this policy @@ -391,26 +440,23 @@ struct kbase_pm_policy { void (*term)(struct kbase_device *kbdev); /** - * Function called to get the current shader core mask + * Function called to find out if shader cores are needed * - * The returned mask should meet or exceed (kbdev->shader_needed_bitmap - * | kbdev->shader_inuse_bitmap). + * This needs to at least satisfy kbdev->pm.backend.shaders_desired, + * and so must never return false when shaders_desired is true. * * @kbdev: The kbase device structure for the device (must be a * valid pointer) * - * Return: The mask of shader cores to be powered + * Return: true if shader cores are needed, false otherwise */ - u64 (*get_core_mask)(struct kbase_device *kbdev); + bool (*shaders_needed)(struct kbase_device *kbdev); /** * Function called to get the current overall GPU power state * - * This function should consider the state of kbdev->pm.active_count. If - * this count is greater than 0 then there is at least one active - * context on the device and the GPU should be powered. If it is equal - * to 0 then there are no active contexts and the GPU could be powered - * off if desired. + * This function must meet or exceed the requirements for power + * indicated by kbase_pm_is_active(). * * @kbdev: The kbase device structure for the device (must be a * valid pointer) @@ -423,111 +469,4 @@ struct kbase_pm_policy { enum kbase_pm_policy_id id; }; - -enum kbase_pm_ca_policy_id { - KBASE_PM_CA_POLICY_ID_FIXED = 1, - KBASE_PM_CA_POLICY_ID_DEVFREQ, - KBASE_PM_CA_POLICY_ID_RANDOM -}; - -typedef u32 kbase_pm_ca_policy_flags; - -/** - * Maximum length of a CA policy names - */ -#define KBASE_PM_CA_MAX_POLICY_NAME_LEN 15 - -/** - * struct kbase_pm_ca_policy - Core availability policy structure. - * - * Each core availability policy exposes a (static) instance of this structure - * which contains function pointers to the policy's methods. - * - * @name: The name of this policy - * @init: Function called when the policy is selected - * @term: Function called when the policy is unselected - * @get_core_mask: Function called to get the current shader core - * availability mask - * @update_core_status: Function called to update the current core status - * @flags: Field indicating flags for this policy - * @id: Field indicating an ID for this policy. This is not - * necessarily the same as its index in the list returned - * by kbase_pm_list_policies(). - * It is used purely for debugging. - */ -struct kbase_pm_ca_policy { - char name[KBASE_PM_CA_MAX_POLICY_NAME_LEN + 1]; - - /** - * Function called when the policy is selected - * - * This should initialize the kbdev->pm.ca_policy_data structure. It - * should not attempt to make any changes to hardware state. - * - * It is undefined what state the cores are in when the function is - * called. - * - * @kbdev The kbase device structure for the device (must be a - * valid pointer) - */ - void (*init)(struct kbase_device *kbdev); - - /** - * Function called when the policy is unselected. - * - * @kbdev The kbase device structure for the device (must be a - * valid pointer) - */ - void (*term)(struct kbase_device *kbdev); - - /** - * Function called to get the current shader core availability mask - * - * When a change in core availability is occurring, the policy must set - * kbdev->pm.ca_in_transition to true. This is to indicate that - * reporting changes in power state cannot be optimized out, even if - * kbdev->pm.desired_shader_state remains unchanged. This must be done - * by any functions internal to the Core Availability Policy that change - * the return value of kbase_pm_ca_policy::get_core_mask. - * - * @kbdev The kbase device structure for the device (must be a - * valid pointer) - * - * Return: The current core availability mask - */ - u64 (*get_core_mask)(struct kbase_device *kbdev); - - /** - * Function called to update the current core status - * - * If none of the cores in core group 0 are ready or transitioning, then - * the policy must ensure that the next call to get_core_mask does not - * return 0 for all cores in core group 0. It is an error to disable - * core group 0 through the core availability policy. - * - * When a change in core availability has finished, the policy must set - * kbdev->pm.ca_in_transition to false. This is to indicate that - * changes in power state can once again be optimized out when - * kbdev->pm.desired_shader_state is unchanged. - * - * @kbdev: The kbase device structure for the device - * (must be a valid pointer) - * @cores_ready: The mask of cores currently powered and - * ready to run jobs - * @cores_transitioning: The mask of cores currently transitioning - * power state - */ - void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, - u64 cores_transitioning); - - kbase_pm_ca_policy_flags flags; - - /** - * Field indicating an ID for this policy. This is not necessarily the - * same as its index in the list returned by kbase_pm_list_policies(). - * It is used purely for debugging. - */ - enum kbase_pm_ca_policy_id id; -}; - #endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c deleted file mode 100755 index e0edddc2504d..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * A simple demand based power management policy - */ - -#include -#include - -static u64 demand_get_core_mask(struct kbase_device *kbdev) -{ - u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; - - if (0 == kbdev->pm.active_count) - return 0; - - return desired; -} - -static bool demand_get_core_active(struct kbase_device *kbdev) -{ - if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) - return false; - - return true; -} - -static void demand_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void demand_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -/* - * The struct kbase_pm_policy structure for the demand power policy. - * - * This is the static structure that defines the demand power policy's callback - * and name. - */ -const struct kbase_pm_policy kbase_pm_demand_policy_ops = { - "demand", /* name */ - demand_init, /* init */ - demand_term, /* term */ - demand_get_core_mask, /* get_core_mask */ - demand_get_core_active, /* get_core_active */ - 0u, /* flags */ - KBASE_PM_POLICY_ID_DEMAND, /* id */ -}; - -KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h deleted file mode 100755 index 5ee182463bd4..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * A simple demand based power management policy - */ - -#ifndef MALI_KBASE_PM_DEMAND_H -#define MALI_KBASE_PM_DEMAND_H - -/** - * DOC: Demand power management policy - * - * The demand power management policy has the following characteristics: - * - When KBase indicates that the GPU will be powered up, but we don't yet - * know which Job Chains are to be run: - * - The Shader Cores are not powered up - * - * - When KBase indicates that a set of Shader Cores are needed to submit the - * currently queued Job Chains: - * - Only those Shader Cores are powered up - * - * - When KBase indicates that the GPU need not be powered: - * - The Shader Cores are powered off, and the GPU itself is powered off too. - * - * Note: - * - KBase indicates the GPU will be powered up when it has a User Process that - * has just started to submit Job Chains. - * - * - KBase indicates the GPU need not be powered when all the Job Chains from - * User Processes have finished, and it is waiting for a User Process to - * submit some more Job Chains. - */ - -/** - * struct kbasep_pm_policy_demand - Private structure for policy instance data - * - * @dummy: No state is needed, a dummy variable - * - * This contains data that is private to the demand power policy. - */ -struct kbasep_pm_policy_demand { - int dummy; -}; - -extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; - -#endif /* MALI_KBASE_PM_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 44803abee574..2e6599a0a5c3 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,15 +29,14 @@ #include #include #include -#if defined(CONFIG_MALI_GATOR_SUPPORT) #include -#endif #include #include #include #include #include #include +#include #include #include #include @@ -45,11 +44,23 @@ #include -#if MALI_MOCK_TEST -#define MOCKABLE(function) function##_original +#ifdef CONFIG_MALI_CORESTACK +bool corestack_driver_control = true; #else -#define MOCKABLE(function) function -#endif /* MALI_MOCK_TEST */ +bool corestack_driver_control; /* Default value of 0/false */ +#endif +module_param(corestack_driver_control, bool, 0000); +MODULE_PARM_DESC(corestack_driver_control, + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); +KBASE_EXPORT_TEST_API(corestack_driver_control); + +bool platform_power_down_only = PLATFORM_POWER_DOWN_ONLY; +module_param(platform_power_down_only, bool, 0000); +MODULE_PARM_DESC(platform_power_down_only, + "Disable power down of individual cores."); /** * enum kbasep_pm_action - Actions that can be performed on a core. @@ -79,6 +90,47 @@ static u64 kbase_pm_get_state( enum kbase_pm_core_type core_type, enum kbasep_pm_action action); +static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) +{ + if (kbdev->pm.backend.protected_transition_override && + kbdev->pm.backend.protected_l2_override) + return true; + + if (kbdev->pm.backend.protected_transition_override && + !kbdev->pm.backend.shaders_desired) + return false; + + return kbdev->pm.backend.l2_desired; +} + +void kbase_pm_protected_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = true; +} +void kbase_pm_protected_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = false; +} + +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (override) { + kbdev->pm.backend.protected_l2_override++; + WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); + } else { + kbdev->pm.backend.protected_l2_override--; + WARN_ON(kbdev->pm.backend.protected_l2_override < 0); + } + + kbase_pm_update_state(kbdev); +} + /** * core_type_to_reg - Decode a core type and action to a register. * @@ -96,24 +148,24 @@ static u64 kbase_pm_get_state( static u32 core_type_to_reg(enum kbase_pm_core_type core_type, enum kbasep_pm_action action) { -#ifdef CONFIG_MALI_CORESTACK - if (core_type == KBASE_PM_CORE_STACK) { - switch (action) { - case ACTION_PRESENT: - return STACK_PRESENT_LO; - case ACTION_READY: - return STACK_READY_LO; - case ACTION_PWRON: - return STACK_PWRON_LO; - case ACTION_PWROFF: - return STACK_PWROFF_LO; - case ACTION_PWRTRANS: - return STACK_PWRTRANS_LO; - default: - BUG(); + if (corestack_driver_control) { + if (core_type == KBASE_PM_CORE_STACK) { + switch (action) { + case ACTION_PRESENT: + return STACK_PRESENT_LO; + case ACTION_READY: + return STACK_READY_LO; + case ACTION_PWRON: + return STACK_PWRON_LO; + case ACTION_PWROFF: + return STACK_PWROFF_LO; + case ACTION_PWRTRANS: + return STACK_PWRTRANS_LO; + default: + WARN(1, "Invalid action for core type\n"); + } } } -#endif /* CONFIG_MALI_CORESTACK */ return (u32)core_type + (u32)action; } @@ -135,19 +187,16 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES, - NULL); + GPU_COMMAND_CLEAN_INV_CACHES); raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - NULL); + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); /* Wait for cache flush to complete before continuing, exit on * gpu resets or loop expiry. */ while (((raw & mask) == 0) && --loops) { raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - NULL); + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); } } #endif @@ -173,6 +222,12 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, u32 lo = cores & 0xFFFFFFFF; u32 hi = (cores >> 32) & 0xFFFFFFFF; + /* When 'platform_power_down_only' is enabled, no core type should be + * turned off individually. + */ + KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF && + platform_power_down_only)); + lockdep_assert_held(&kbdev->hwaccess_lock); reg = core_type_to_reg(core_type, action); @@ -238,10 +293,10 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, } if (lo != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); if (hi != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); } /** @@ -269,26 +324,12 @@ static u64 kbase_pm_get_state(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(reg); - lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); - hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); return (((u64) hi) << 32) | ((u64) lo); } -void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev) -{ - kbdev->shader_inuse_bitmap = 0; - kbdev->shader_needed_bitmap = 0; - kbdev->shader_available_bitmap = 0; - kbdev->tiler_available_bitmap = 0; - kbdev->l2_users_count = 0; - kbdev->l2_available_bitmap = 0; - kbdev->tiler_needed_cnt = 0; - kbdev->tiler_inuse_cnt = 0; - - memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); -} - /** * kbase_pm_get_present_cores - Get the cores that are present * @@ -392,573 +433,776 @@ u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); -/** - * kbase_pm_transition_core_type - Perform power transitions for a particular - * core type. - * - * This function will perform any available power transitions to make the actual - * hardware state closer to the desired state. If a core is currently - * transitioning then changes to the power state of that call cannot be made - * until the transition has finished. Cores which are not present in the - * hardware are ignored if they are specified in the desired_state bitmask, - * however the return value will always be 0 in this case. - * - * @kbdev: The kbase device - * @type: The core type to perform transitions for - * @desired_state: A bit mask of the desired state of the cores - * @in_use: A bit mask of the cores that are currently running - * jobs. These cores have to be kept powered up because - * there are jobs running (or about to run) on them. - * @available: Receives a bit mask of the cores that the job - * scheduler can use to submit jobs to. May be NULL if - * this is not needed. - * @powering_on: Bit mask to update with cores that are - * transitioning to a power-on state. - * - * Return: true if the desired state has been reached, false otherwise - */ -static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, - enum kbase_pm_core_type type, - u64 desired_state, - u64 in_use, - u64 * const available, - u64 *powering_on) +static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) { - u64 present; - u64 ready; - u64 trans; - u64 powerup; - u64 powerdown; - u64 powering_on_trans; - u64 desired_state_in_use; + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); - /* Get current state */ - present = kbase_pm_get_present_cores(kbdev, type); - trans = kbase_pm_get_trans_cores(kbdev, type); - ready = kbase_pm_get_ready_cores(kbdev, type); - /* mask off ready from trans in case transitions finished between the - * register reads */ - trans &= ~ready; + do { + /* Get current state */ + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2); + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + l2_trans &= ~l2_ready; + tiler_trans &= ~tiler_ready; + + prev_state = backend->l2_state; + + switch (backend->l2_state) { + case KBASE_L2_OFF: + if (kbase_pm_is_l2_desired(kbdev)) { + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); + + /* If we have more than one L2 cache then we + * must power them on explicitly. + */ + if (l2_present != 1) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present & ~1, + ACTION_PWRON); + backend->l2_state = KBASE_L2_PEND_ON; + } + break; - if (trans) /* Do not progress if any cores are transitioning */ - return false; + case KBASE_L2_PEND_ON: + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, + (u32)tiler_ready); + /* + * Ensure snoops are enabled after L2 is powered + * up. Note that kbase keeps track of the snoop + * state, so safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); + + /* With the L2 enabled, we can now enable + * hardware counters. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + + /* Now that the L2 is on, the shaders can start + * powering on if they're required. The obvious + * way to do this would be to call + * kbase_pm_shaders_update_state() here. + * However, that would make the two state + * machines mutually recursive, as the opposite + * would be needed for powering down. Instead, + * callers of this function should use the + * kbase_pm_update_state() wrapper, which will + * call the shader state machine immediately + * after the L2 (for power up), or + * automatically re-invoke the L2 state machine + * when the shaders power down. + */ + } + break; + + case KBASE_L2_ON_HWCNT_ENABLE: + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->l2_state = KBASE_L2_ON; + break; + + case KBASE_L2_ON: + if (!kbase_pm_is_l2_desired(kbdev)) { + /* Do not power off L2 until the shaders and + * core stacks are off. + */ + if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + break; + + /* We need to make sure hardware counters are + * disabled before powering down the L2, to + * prevent loss of data. + * + * We waited until after the cores were powered + * down to prevent ping-ponging between hwcnt + * enabled and disabled, which would have + * happened if userspace submitted more work + * while we were trying to power down. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + } + break; + + case KBASE_L2_ON_HWCNT_DISABLE: + /* If the L2 became desired while we were waiting on the + * worker to do the actual hwcnt disable (which might + * happen if some work was submitted immediately after + * the shaders powered off), then we need to early-out + * of this state and re-enable hwcnt. + * + * If we get lucky, the hwcnt disable might not have + * actually started yet, and the logic in the hwcnt + * enable state will prevent the worker from + * performing the disable entirely, preventing loss of + * any hardware counter data. + * + * If the hwcnt disable has started, then we'll lose + * a tiny amount of hardware counter data between the + * disable and the re-enable occurring. + * + * This loss of data is preferable to the alternative, + * which is to block the shader cores from doing any + * work until we're sure hwcnt has been re-enabled. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + } - powering_on_trans = trans & *powering_on; - *powering_on = powering_on_trans; + /* See if we can get away with disabling hwcnt + * atomically, otherwise kick off a worker. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + backend->hwcnt_disabled = true; + else +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &backend->hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &backend->hwcnt_disable_work); +#endif + } - if (available != NULL) - *available = (ready | powering_on_trans) & desired_state; + if (backend->hwcnt_disabled) + backend->l2_state = KBASE_L2_POWER_DOWN; + break; + + case KBASE_L2_POWER_DOWN: + if (!platform_power_down_only) + /* Powering off the L2 will also power off the + * tiler. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, + ACTION_PWROFF); + else + /* If L2 cache is powered then we must flush it + * before we power off the GPU. Normally this + * would have been handled when the L2 was + * powered off. + */ + kbase_gpu_start_cache_clean_nolock( + kbdev); - /* Update desired state to include the in-use cores. These have to be - * kept powered up because there are jobs running or about to run on - * these cores - */ - desired_state_in_use = desired_state | in_use; - - /* Update state of whether l2 caches are powered */ - if (type == KBASE_PM_CORE_L2) { - if ((ready == present) && (desired_state_in_use == ready) && - (trans == 0)) { - /* All are ready, none will be turned off, and none are - * transitioning */ - kbdev->pm.backend.l2_powered = 1; - /* - * Ensure snoops are enabled after L2 is powered up, - * note that kbase keeps track of the snoop state, so - * safe to repeatedly call. - */ - kbase_pm_cache_snoop_enable(kbdev); - if (kbdev->l2_users_count > 0) { - /* Notify any registered l2 cache users - * (optimized out when no users waiting) */ - wake_up(&kbdev->pm.backend.l2_powered_wait); + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, 0u); + + backend->l2_state = KBASE_L2_PEND_OFF; + break; + + case KBASE_L2_PEND_OFF: + if (!platform_power_down_only) { + /* We only need to check the L2 here - if the L2 + * is off then the tiler is definitely also off. + */ + if (!l2_trans && !l2_ready) + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + } else { + if (!kbdev->cache_clean_in_progress) + backend->l2_state = KBASE_L2_OFF; } - } else - kbdev->pm.backend.l2_powered = 0; - } + break; - if (desired_state == ready && (trans == 0)) - return true; + case KBASE_L2_RESET_WAIT: + if (!backend->in_reset) { + /* Reset complete */ + backend->l2_state = KBASE_L2_OFF; + } + break; - /* Restrict the cores to those that are actually present */ - powerup = desired_state_in_use & present; - powerdown = (~desired_state_in_use) & present; - - /* Restrict to cores that are not already in the desired state */ - powerup &= ~ready; - powerdown &= ready; - - /* Don't transition any cores that are already transitioning, except for - * Mali cores that support the following case: - * - * If the SHADER_PWRON or TILER_PWRON registers are written to turn on - * a core that is currently transitioning to power off, then this is - * remembered and the shader core is automatically powered up again once - * the original transition completes. Once the automatic power on is - * complete any job scheduled on the shader core should start. - */ - powerdown &= ~trans; + default: + WARN(1, "Invalid state in l2_state: %d", + backend->l2_state); + } + } while (backend->l2_state != prev_state); - if (kbase_hw_has_feature(kbdev, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) - if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) - trans = powering_on_trans; /* for exception cases, only - * mask off cores in power on - * transitions */ + if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } - powerup &= ~trans; + if (backend->l2_state == KBASE_L2_ON) + return l2_present; + return 0; +} - /* Perform transitions if any */ - kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); -#if !PLATFORM_POWER_DOWN_ONLY - kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); -#endif +static void shader_poweroff_timer_stop_callback(struct work_struct *data) +{ + unsigned long flags; + struct kbasep_pm_tick_timer_state *stt = container_of(data, + struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); - /* Recalculate cores transitioning on, and re-evaluate our state */ - powering_on_trans |= powerup; - *powering_on = powering_on_trans; - if (available != NULL) - *available = (ready | powering_on_trans) & desired_state; + hrtimer_cancel(&stt->timer); - return false; -} + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); -KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); + stt->cancel_queued = false; + if (kbdev->pm.backend.gpu_powered) + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} /** - * get_desired_cache_status - Determine which caches should be on for a - * particular core state + * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer + * @kbdev: pointer to kbase device * - * This function takes a bit mask of the present caches and the cores (or - * caches) that are attached to the caches that will be powered. It then - * computes which caches should be turned on to allow the cores requested to be - * powered up. + * Synchronization between the shader state machine and the timer thread is + * difficult. This is because situations may arise where the state machine + * wants to start the timer, but the callback is already running, and has + * already passed the point at which it checks whether it is required, and so + * cancels itself, even though the state machine may have just tried to call + * hrtimer_start. * - * @present: The bit mask of present caches - * @cores_powered: A bit mask of cores (or L2 caches) that are desired to - * be powered - * @tilers_powered: The bit mask of tilers that are desired to be powered + * This cannot be stopped by holding hwaccess_lock in the timer thread, + * because there are still infinitesimally small sections at the start and end + * of the callback where the lock is not held. * - * Return: A bit mask of the caches that should be turned on + * Instead, a new state is added to the shader state machine, + * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee + * that when the shaders are switched off, the timer has definitely been + * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the + * timer is started, it is guaranteed that either the timer is already running + * (from an availability change or cancelled timer), or hrtimer_start will + * succeed. It is critical to avoid ending up in + * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could + * hang there forever. */ -static u64 get_desired_cache_status(u64 present, u64 cores_powered, - u64 tilers_powered) +static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) { - u64 desired = 0; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; - while (present) { - /* Find out which is the highest set bit */ - u64 bit = fls64(present) - 1; - u64 bit_mask = 1ull << bit; - /* Create a mask which has all bits from 'bit' upwards set */ - - u64 mask = ~(bit_mask - 1); + lockdep_assert_held(&kbdev->hwaccess_lock); - /* If there are any cores powered at this bit or above (that - * haven't previously been processed) then we need this core on - */ - if (cores_powered & mask) - desired |= bit_mask; + stt->needed = false; - /* Remove bits from cores_powered and present */ - cores_powered &= ~mask; - present &= ~bit_mask; + if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { + stt->cancel_queued = true; + queue_work(stt->wq, &stt->work); } +} - /* Power up the required L2(s) for the tiler */ - if (tilers_powered) - desired |= 1; +static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + enum kbase_shader_core_state prev_state; + u64 stacks_avail = 0; - return desired; -} + lockdep_assert_held(&kbdev->hwaccess_lock); -KBASE_EXPORT_TEST_API(get_desired_cache_status); + if (corestack_driver_control) + /* Always power on all the corestacks. Disabling certain + * corestacks when their respective shaders are not in the + * available bitmap is not currently supported. + */ + stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); -#ifdef CONFIG_MALI_CORESTACK -u64 kbase_pm_core_stack_mask(u64 cores) -{ - u64 stack_mask = 0; - size_t const MAX_CORE_ID = 31; - size_t const NUM_CORES_PER_STACK = 4; - size_t i; - - for (i = 0; i <= MAX_CORE_ID; ++i) { - if (test_bit(i, (unsigned long *)&cores)) { - /* Every core which ID >= 16 is filled to stacks 4-7 - * instead of 0-3 */ - size_t const stack_num = (i >= 16) ? - (i % NUM_CORES_PER_STACK) + 4 : - (i % NUM_CORES_PER_STACK); - set_bit(stack_num, (unsigned long *)&stack_mask); + do { + u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 stacks_trans = 0; + u64 stacks_ready = 0; + + if (corestack_driver_control) { + stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); + stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); } - } - return stack_mask; -} -#endif /* CONFIG_MALI_CORESTACK */ + /* mask off ready from trans in case transitions finished + * between the register reads + */ + shaders_trans &= ~shaders_ready; + stacks_trans &= ~stacks_ready; -bool -MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) -{ - bool cores_are_available = false; - bool in_desired_state = true; - u64 desired_l2_state; -#ifdef CONFIG_MALI_CORESTACK - u64 desired_stack_state; - u64 stacks_powered; -#endif /* CONFIG_MALI_CORESTACK */ - u64 cores_powered; - u64 tilers_powered; - u64 tiler_available_bitmap; - u64 tiler_transitioning_bitmap; - u64 shader_available_bitmap; - u64 shader_ready_bitmap; - u64 shader_transitioning_bitmap; - u64 l2_available_bitmap; - u64 prev_l2_available_bitmap; - u64 l2_inuse_bitmap; + prev_state = backend->shaders_state; - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->hwaccess_lock); + switch (backend->shaders_state) { + case KBASE_SHADERS_OFF_CORESTACK_OFF: + /* Ignore changes to the shader core availability + * except at certain points where we can handle it, + * i.e. off and SHADERS_ON_CORESTACK_ON. + */ + backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); - spin_lock(&kbdev->pm.backend.gpu_powered_lock); - if (kbdev->pm.backend.gpu_powered == false) { - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); - if (kbdev->pm.backend.desired_shader_state == 0 && - kbdev->pm.backend.desired_tiler_state == 0) - return true; - return false; - } + if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { + if (corestack_driver_control) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWRON); - /* Trace that a change-state is being requested, and that it took - * (effectively) no time to start it. This is useful for counting how - * many state changes occurred, in a way that's backwards-compatible - * with processing the trace data */ - kbase_timeline_pm_send_event(kbdev, - KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); - kbase_timeline_pm_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); - - /* If any cores are already powered then, we must keep the caches on */ - shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_SHADER); - cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - cores_powered |= kbdev->pm.backend.desired_shader_state; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + } + break; -#ifdef CONFIG_MALI_CORESTACK - /* Work out which core stacks want to be powered */ - desired_stack_state = kbase_pm_core_stack_mask(cores_powered); - stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) | - desired_stack_state; -#endif /* CONFIG_MALI_CORESTACK */ - - /* Work out which tilers want to be powered */ - tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_TILER); - tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); - tilers_powered |= kbdev->pm.backend.desired_tiler_state; - - /* If there are l2 cache users registered, keep all l2s powered even if - * all other cores are off. */ - if (kbdev->l2_users_count > 0) - cores_powered |= kbdev->gpu_props.props.raw_props.l2_present; - - desired_l2_state = get_desired_cache_status( - kbdev->gpu_props.props.raw_props.l2_present, - cores_powered, tilers_powered); - - l2_inuse_bitmap = get_desired_cache_status( - kbdev->gpu_props.props.raw_props.l2_present, - cores_powered | shader_transitioning_bitmap, - tilers_powered | tiler_transitioning_bitmap); + case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: + if (!stacks_trans && stacks_ready == stacks_avail) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); -#ifdef CONFIG_MALI_CORESTACK - if (stacks_powered) - desired_l2_state |= 1; -#endif /* CONFIG_MALI_CORESTACK */ + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - /* If any l2 cache is on, then enable l2 #0, for use by job manager */ - if (0 != desired_l2_state) - desired_l2_state |= 1; + } + break; + + case KBASE_SHADERS_PEND_ON_CORESTACK_ON: + if (!shaders_trans && shaders_ready == backend->shaders_avail) { + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE, + NULL, NULL, 0u, (u32)shaders_ready); + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON: + backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->shaders_desired) { + if (kbdev->pm.backend.protected_transition_override || + !stt->configured_ticks || + WARN_ON(stt->cancel_queued)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } else { + stt->remaining_ticks = stt->configured_ticks; + stt->needed = true; + + /* The shader hysteresis timer is not + * done the obvious way, which would be + * to start an hrtimer when the shader + * power off is requested. Instead, + * use a 'tick' timer, and set the + * remaining number of ticks on a power + * off request. This avoids the + * latency of starting, then + * immediately cancelling an hrtimer + * when the shaders are re-requested + * before the timeout expires. + */ + if (!hrtimer_active(&stt->timer)) + hrtimer_start(&stt->timer, + stt->configured_interval, + HRTIMER_MODE_REL); + + backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + } + } else if (!platform_power_down_only) { + if (backend->shaders_avail & ~shaders_ready) { + backend->shaders_avail |= shaders_ready; + + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail & ~shaders_ready, + ACTION_PWRON); + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + + } + } + break; + + case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: + if (WARN_ON(!hrtimer_active(&stt->timer))) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } - prev_l2_available_bitmap = kbdev->l2_available_bitmap; - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap, - &l2_available_bitmap, - &kbdev->pm.backend.powering_on_l2_state); + if (backend->shaders_desired) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } else if (stt->remaining_ticks == 0) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + break; - if (kbdev->l2_available_bitmap != l2_available_bitmap) - KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap); + case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: + shader_poweroff_timer_queue_cancel(kbdev); - kbdev->l2_available_bitmap = l2_available_bitmap; + if (!platform_power_down_only) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready, ACTION_PWROFF); + KBASE_TRACE_ADD(kbdev, + PM_CORES_CHANGE_AVAILABLE, + NULL, NULL, 0u, 0u); -#ifdef CONFIG_MALI_CORESTACK - if (in_desired_state) { - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_STACK, desired_stack_state, 0, - &kbdev->stack_available_bitmap, - &kbdev->pm.backend.powering_on_stack_state); - } -#endif /* CONFIG_MALI_CORESTACK */ - - if (in_desired_state) { - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_TILER, - kbdev->pm.backend.desired_tiler_state, - 0, &tiler_available_bitmap, - &kbdev->pm.backend.powering_on_tiler_state); - in_desired_state &= kbase_pm_transition_core_type(kbdev, - KBASE_PM_CORE_SHADER, - kbdev->pm.backend.desired_shader_state, - kbdev->shader_inuse_bitmap, - &shader_available_bitmap, - &kbdev->pm.backend.powering_on_shader_state); - - if (kbdev->shader_available_bitmap != shader_available_bitmap) { - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, - (u32) shader_available_bitmap); - KBASE_TIMELINE_POWER_SHADER(kbdev, - shader_available_bitmap); - } + backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; + break; - kbdev->shader_available_bitmap = shader_available_bitmap; + case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: + if ((!shaders_trans && !shaders_ready) || platform_power_down_only) { + if (corestack_driver_control && !platform_power_down_only) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWROFF); - if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, - (u32) tiler_available_bitmap); - KBASE_TIMELINE_POWER_TILER(kbdev, - tiler_available_bitmap); + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: + if ((!stacks_trans && !stacks_ready) || platform_power_down_only) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; + + case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: + if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + break; + + case KBASE_SHADERS_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; } + } while (backend->shaders_state != prev_state); +} - kbdev->tiler_available_bitmap = tiler_available_bitmap; +static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) +{ + bool in_desired_state = true; - } else if ((l2_available_bitmap & - kbdev->gpu_props.props.raw_props.tiler_present) != - kbdev->gpu_props.props.raw_props.tiler_present) { - tiler_available_bitmap = 0; + lockdep_assert_held(&kbdev->hwaccess_lock); - if (kbdev->tiler_available_bitmap != tiler_available_bitmap) - KBASE_TIMELINE_POWER_TILER(kbdev, - tiler_available_bitmap); + if (kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_ON) + in_desired_state = false; + else if (!kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_OFF) + in_desired_state = false; + + if (kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + in_desired_state = false; + else if (!kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + in_desired_state = false; + + return in_desired_state; +} - kbdev->tiler_available_bitmap = tiler_available_bitmap; - } +static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) +{ + bool in_desired_state; + unsigned long flags; - /* State updated for slow-path waiters */ - kbdev->pm.backend.gpu_in_desired_state = in_desired_state; - - shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER); - shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_SHADER); - - /* Determine whether the cores are now available (even if the set of - * available cores is empty). Note that they can be available even if - * we've not finished transitioning to the desired state */ - if ((kbdev->shader_available_bitmap & - kbdev->pm.backend.desired_shader_state) - == kbdev->pm.backend.desired_shader_state && - (kbdev->tiler_available_bitmap & - kbdev->pm.backend.desired_tiler_state) - == kbdev->pm.backend.desired_tiler_state) { - cores_are_available = true; - - KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, - (u32)(kbdev->shader_available_bitmap & - kbdev->pm.backend.desired_shader_state)); - KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, - (u32)(kbdev->tiler_available_bitmap & - kbdev->pm.backend.desired_tiler_state)); - - /* Log timelining information about handling events that power - * up cores, to match up either with immediate submission either - * because cores already available, or from PM IRQ */ - if (!in_desired_state) - kbase_timeline_pm_send_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - } + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state_with_l2_powered( + struct kbase_device *kbdev) +{ + bool in_desired_state = false; + unsigned long flags; - if (in_desired_state) { - KBASE_DEBUG_ASSERT(cores_are_available); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_pm_is_in_desired_state_nolock(kbdev) && + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + in_desired_state = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static void kbase_pm_trace_power_state(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); #if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER)); -#ifdef CONFIG_MALI_CORESTACK + kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER)); + if (corestack_driver_control) kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK)); -#endif /* CONFIG_MALI_CORESTACK */ #endif - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_L2)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_SHADER)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_TILER)); -#ifdef CONFIG_MALI_CORESTACK + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE( + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); + + if (corestack_driver_control) KBASE_TLSTREAM_AUX_PM_STATE( KBASE_PM_CORE_STACK, kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_STACK)); -#endif /* CONFIG_MALI_CORESTACK */ +} + +void kbase_pm_update_state(struct kbase_device *kbdev) +{ + enum kbase_shader_core_state prev_shaders_state = + kbdev->pm.backend.shaders_state; + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbdev->pm.backend.gpu_powered) + return; /* Do nothing if the GPU is off */ + + kbase_pm_l2_update_state(kbdev); + kbase_pm_shaders_update_state(kbdev); + + /* If the shaders just turned off, re-invoke the L2 state machine, in + * case it was waiting for the shaders to turn off before powering down + * the L2. + */ + if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && + kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) + kbase_pm_l2_update_state(kbdev); + + if (kbase_pm_is_in_desired_state_nolock(kbdev)) { KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, - kbdev->pm.backend.gpu_in_desired_state, - (u32)kbdev->pm.backend.desired_shader_state); - KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, - (u32)kbdev->pm.backend.desired_tiler_state); - - /* Log timelining information for synchronous waiters */ - kbase_timeline_pm_send_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - /* Wake slow-path waiters. Job scheduler does not use this. */ - KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + true, kbdev->pm.backend.shaders_avail); + + kbase_pm_trace_power_state(kbdev); + KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); } +} + +static enum hrtimer_restart +shader_tick_timer_callback(struct hrtimer *timer) +{ + struct kbasep_pm_tick_timer_state *stt = container_of(timer, + struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + enum hrtimer_restart restart = HRTIMER_NORESTART; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + if (stt->remaining_ticks && + backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + stt->remaining_ticks--; - /* kbase_pm_ca_update_core_status can cause one-level recursion into - * this function, so it must only be called once all changes to kbdev - * have been committed, and after the gpu_powered_lock has been - * dropped. */ - if (kbdev->shader_ready_bitmap != shader_ready_bitmap || - kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { - kbdev->shader_ready_bitmap = shader_ready_bitmap; - kbdev->shader_transitioning_bitmap = - shader_transitioning_bitmap; + /* If the remaining ticks just changed from 1 to 0, invoke the + * PM state machine to power off the shader cores. + */ + if (!stt->remaining_ticks && !backend->shaders_desired) + kbase_pm_update_state(kbdev); + } - kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, - shader_transitioning_bitmap); + if (stt->needed) { + hrtimer_forward_now(timer, stt->configured_interval); + restart = HRTIMER_RESTART; } - /* The core availability policy is not allowed to keep core group 0 - * turned off (unless it was changing the l2 power state) */ - if (!((shader_ready_bitmap | shader_transitioning_bitmap) & - kbdev->gpu_props.props.coherency_info.group[0].core_mask) && - (prev_l2_available_bitmap == desired_l2_state) && - !(kbase_pm_ca_get_core_mask(kbdev) & - kbdev->gpu_props.props.coherency_info.group[0].core_mask)) - BUG(); - - /* The core availability policy is allowed to keep core group 1 off, - * but all jobs specifically targeting CG1 must fail */ - if (!((shader_ready_bitmap | shader_transitioning_bitmap) & - kbdev->gpu_props.props.coherency_info.group[1].core_mask) && - !(kbase_pm_ca_get_core_mask(kbdev) & - kbdev->gpu_props.props.coherency_info.group[1].core_mask)) - kbdev->pm.backend.cg1_disabled = true; - else - kbdev->pm.backend.cg1_disabled = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return restart; +} + +int kbase_pm_state_machine_init(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; + + stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!stt->wq) + return -ENOMEM; + + INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); + + stt->needed = false; + hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stt->timer.function = shader_tick_timer_callback; + stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + + return 0; +} + +void kbase_pm_state_machine_term(struct kbase_device *kbdev) +{ + hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); + destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); +} + +void kbase_pm_reset_start_locked(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + backend->in_reset = true; + backend->l2_state = KBASE_L2_RESET_WAIT; + backend->shaders_state = KBASE_SHADERS_RESET_WAIT; + + /* We're in a reset, so hwcnt will have been synchronously disabled by + * this function's caller as part of the reset process. We therefore + * know that any call to kbase_hwcnt_context_disable_atomic, if + * required to sync the hwcnt refcount with our internal state, is + * guaranteed to succeed. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + WARN_ON(!kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)); + backend->hwcnt_disabled = true; + } + + shader_poweroff_timer_queue_cancel(kbdev); +} + +void kbase_pm_reset_complete(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; - return cores_are_available; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + backend->in_reset = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); -/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has +/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has * aborted due to a fatal signal. If the time spent waiting has exceeded this * threshold then there is most likely a hardware issue. */ #define PM_TIMEOUT (5*HZ) /* 5s */ -void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO))); + + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +} + +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) { unsigned long flags; unsigned long timeout; - bool cores_are_available; - int ret; + int err; - /* Force the transition to be checked and reported - the cores may be - * 'available' (for job submission) but not fully powered up. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); + /* Let the state machine latch the most recent desired state. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); timeout = jiffies + PM_TIMEOUT; /* Wait for cores */ - ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbdev->pm.backend.gpu_in_desired_state); - - if (ret < 0 && time_after(jiffies, timeout)) { - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); - dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, "\tShader=%016llx\n", - kbdev->pm.backend.desired_shader_state); - dev_err(kbdev->dev, "\tTiler =%016llx\n", - kbdev->pm.backend.desired_tiler_state); - dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_HI), NULL), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_LO), - NULL)); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_HI), NULL), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_LO), NULL)); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_HI), NULL), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_LO), NULL)); - dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_HI), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_LO), NULL)); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_HI), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_LO), NULL)); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_HI), NULL), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_LO), NULL)); -#if KBASE_GPU_RESET_EN - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ - } else { - /* Log timelining information that a change in state has - * completed */ - kbase_timeline_pm_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); - } + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); } -KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { @@ -970,18 +1214,15 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) * and unmask them all. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, - NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, - NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, - NULL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); } KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); @@ -995,15 +1236,13 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) */ lockdep_assert_held(&kbdev->hwaccess_lock); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, - NULL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, - NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -1017,7 +1256,6 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); - /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -1027,11 +1265,10 @@ KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) { bool reset_required = is_resume; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; unsigned long flags; KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_mutex); lockdep_assert_held(&kbdev->pm.lock); if (kbdev->pm.backend.gpu_powered) { @@ -1051,12 +1288,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbdev->pm.backend.callback_power_resume(kbdev); return; } else if (kbdev->pm.backend.callback_power_on) { - kbdev->pm.backend.callback_power_on(kbdev); - /* If your platform properly keeps the GPU state you may use the - * return value of the callback_power_on function to - * conditionally reset the GPU on power up. Currently we are - * conservative and always reset the GPU. */ - reset_required = true; + reset_required = kbdev->pm.backend.callback_power_on(kbdev); } spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); @@ -1075,8 +1307,14 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); - /* Lastly, enable the interrupts */ + /* Enable the interrupts */ kbase_pm_enable_interrupts(kbdev); + + /* Turn on the L2 caches */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_clock_on); @@ -1089,7 +1327,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) lockdep_assert_held(&kbdev->pm.lock); /* ASSERT that the cores should now be unavailable. No lock needed. */ - KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); kbdev->poweroff_pending = true; @@ -1219,10 +1457,10 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (!kbdev->hw_quirks_sc) kbdev->hw_quirks_sc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_CONFIG), NULL); + GPU_CONTROL_REG(SHADER_CONFIG)); kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_CONFIG), NULL); + GPU_CONTROL_REG(TILER_CONFIG)); /* Set tiler clock gate override if required */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) @@ -1230,7 +1468,7 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) /* Limit the GPU bus bandwidth if the platform needs this. */ kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); + GPU_CONTROL_REG(L2_MMU_CONFIG)); /* Limit read & write ID width for AXI */ @@ -1297,7 +1535,7 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) u32 coherency_features; coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + GPU_CONTROL_REG(COHERENCY_FEATURES)); /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly * documented for tMIx so force correct value here. @@ -1313,29 +1551,46 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + int default_idvs_group_size = 0xF; + u32 tmp; + + if (of_property_read_u32(kbdev->dev->of_node, + "idvs-group-size", &tmp)) + tmp = default_idvs_group_size; + + if (tmp > JM_MAX_IDVS_GROUP_SIZE) { + dev_err(kbdev->dev, + "idvs-group-size of %d is too large. Maximum value is %d", + tmp, JM_MAX_IDVS_GROUP_SIZE); + tmp = default_idvs_group_size; + } + + kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT; + } + if (!kbdev->hw_quirks_jm) kbdev->hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JM_CONFIG), NULL); + GPU_CONTROL_REG(JM_CONFIG)); -#ifdef CONFIG_MALI_CORESTACK #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; -#endif /* CONFIG_MALI_CORESTACK */ + if (corestack_driver_control) + kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; } static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) { kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), - kbdev->hw_quirks_sc, NULL); + kbdev->hw_quirks_sc); kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), - kbdev->hw_quirks_tiler, NULL); + kbdev->hw_quirks_tiler); kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), - kbdev->hw_quirks_mmu, NULL); + kbdev->hw_quirks_mmu); kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_jm, NULL); + kbdev->hw_quirks_jm); } @@ -1375,11 +1630,10 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SOFT_RESET, NULL); + GPU_COMMAND_SOFT_RESET); /* Unmask the reset complete interrupt only */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, - NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; @@ -1404,7 +1658,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* No interrupt has been received - check if the RAWSTAT register says * the reset has completed */ - if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & RESET_COMPLETED) { /* The interrupt is set in the RAWSTAT; this suggests that the * interrupts are not getting to the CPU */ @@ -1420,7 +1674,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) RESET_TIMEOUT); KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET, NULL); + GPU_COMMAND_HARD_RESET); /* Restart the timer to wait for the hard reset to complete */ rtdata.timed_out = 0; @@ -1451,7 +1705,7 @@ static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) struct kbase_device *kbdev = pdev->data; kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SET_PROTECTED_MODE, NULL); + GPU_COMMAND_SET_PROTECTED_MODE); return 0; } @@ -1473,7 +1727,6 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; int err; - bool resume_vinstr = false; KBASE_DEBUG_ASSERT(NULL != kbdev); lockdep_assert_held(&kbdev->pm.lock); @@ -1500,15 +1753,9 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* The cores should be made unavailable due to the reset */ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->shader_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - if (kbdev->tiler_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, (u32)0u); - kbdev->shader_available_bitmap = 0u; - kbdev->tiler_available_bitmap = 0u; - kbdev->l2_available_bitmap = 0u; + if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ @@ -1519,11 +1766,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) err = kbase_pm_do_reset(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->protected_mode) - resume_vinstr = true; kbdev->protected_mode = false; - kbase_ipa_model_use_configured_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); if (err) @@ -1538,7 +1781,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* Sanity check protected mode was left after reset */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { u32 gpu_status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS), NULL); + GPU_CONTROL_REG(GPU_STATUS)); WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); } @@ -1547,9 +1790,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) * false when called from kbase_pm_powerup */ if (kbdev->pm.backend.gpu_cycle_counter_requests && (flags & PM_ENABLE_IRQS)) { - /* enable interrupts as the L2 may have to be powered on */ kbase_pm_enable_interrupts(kbdev); - kbase_pm_request_l2_caches(kbdev); /* Re-enable the counters if we need to */ spin_lock_irqsave( @@ -1557,15 +1798,11 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) irq_flags); if (kbdev->pm.backend.gpu_cycle_counter_requests) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START, NULL); + GPU_COMMAND_CYCLE_COUNT_START); spin_unlock_irqrestore( &kbdev->pm.backend.gpu_cycle_counter_requests_lock, irq_flags); - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - kbase_pm_release_l2_caches(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - kbase_pm_disable_interrupts(kbdev); } @@ -1573,10 +1810,16 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_enable_interrupts(kbdev); exit: - /* If GPU is leaving protected mode resume vinstr operation. */ - if (kbdev->vinstr_ctx && resume_vinstr) - kbase_vinstr_resume(kbdev->vinstr_ctx); - + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); return err; } @@ -1590,9 +1833,8 @@ exit: * kbase_pm_request_gpu_cycle_counter() or * kbase_pm_request_gpu_cycle_counter_l2_is_on() only * - * When this function is called the l2 cache must be on and the l2 cache users - * count must have been incremented by a call to ( - * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() ) + * When this function is called the l2 cache must be on - i.e., the GPU must be + * on. * * @kbdev: The kbase device structure of the device */ @@ -1608,7 +1850,7 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START, NULL); + GPU_COMMAND_CYCLE_COUNT_START); spin_unlock_irqrestore( &kbdev->pm.backend.gpu_cycle_counter_requests_lock, @@ -1624,8 +1866,6 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); - kbase_pm_request_l2_caches(kbdev); - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -1640,8 +1880,6 @@ void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); - kbase_pm_request_l2_caches_l2_is_on(kbdev); - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -1664,13 +1902,11 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_STOP, NULL); + GPU_COMMAND_CYCLE_COUNT_STOP); spin_unlock_irqrestore( &kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); - - kbase_pm_release_l2_caches(kbdev); } void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index c558736f401c..e88b3a836631 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -163,7 +163,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); * kbase_pm_disable_interrupts - Disable interrupts on the device. * * This prevents delivery of Power Management interrupts to the CPU so that - * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler + * kbase_pm_update_state() will not be called from the IRQ handler * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. * * Interrupts are also disabled after a call to kbase_pm_clock_off(). @@ -206,58 +206,38 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); */ void kbase_pm_reset_done(struct kbase_device *kbdev); - /** - * kbase_pm_check_transitions_nolock - Check if there are any power transitions - * to make, and if so start them. - * - * This function will check the desired_xx_state members of - * struct kbase_pm_device_data and the actual status of the hardware to see if - * any power transitions can be made at this time to make the hardware state - * closer to the state desired by the power policy. + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached * - * The return value can be used to check whether all the desired cores are - * available, and so whether it's worth submitting a job (e.g. from a Power - * Management IRQ). + * Wait for the L2 and shader power state machines to reach the states + * corresponding to the values of 'l2_desired' and 'shaders_desired'. * - * Note that this still returns true when desired_xx_state has no - * cores. That is: of the no cores desired, none were *un*available. In - * this case, the caller may still need to try submitting jobs. This is because - * the Core Availability Policy might have taken us to an intermediate state - * where no cores are powered, before powering on more cores (e.g. for core - * rotation) + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. * - * The caller must hold kbase_device.pm.power_change_lock + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. * * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: non-zero when all desired cores are available. That is, - * it's worthwhile for the caller to submit a job. - * false otherwise */ -bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); /** - * kbase_pm_check_transitions_sync - Synchronous and locking variant of - * kbase_pm_check_transitions_nolock() + * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * - * On returning, the desired state at the time of the call will have been met. + * Wait for the L2 to be powered on, and for the L2 and shader state machines to + * stabilise by reaching the states corresponding to the values of 'l2_desired' + * and 'shaders_desired'. * - * There is nothing to stop the core being switched off by calls to - * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the - * caller must have already made a call to - * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. + * kbdev->pm.active_count must be non-zero when calling this function. * - * The usual use-case for this is to ensure cores are 'READY' after performing - * a GPU Reset. - * - * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold - * kbase_device.pm.power_change_lock, because this function will take that - * lock itself. + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. * * @kbdev: The kbase device structure for the device (must be a valid pointer) */ -void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); /** * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() @@ -268,6 +248,25 @@ void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); */ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); +/** + * kbase_pm_update_state - Update the L2 and shader power state machines + * @kbdev: Device pointer + */ +void kbase_pm_update_state(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_init - Initialize the state machines, primarily the + * shader poweroff timer + * @kbdev: Device pointer + */ +int kbase_pm_state_machine_init(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_term - Clean up the PM state machines' data + * @kbdev: Device pointer + */ +void kbase_pm_state_machine_term(struct kbase_device *kbdev); + /** * kbase_pm_update_cores_state - Update the desired state of shader cores from * the Power Policy, and begin any power @@ -282,24 +281,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); */ void kbase_pm_update_cores_state(struct kbase_device *kbdev); -/** - * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off - * the GPU and/or shader cores. - * - * This should be called by any functions which directly power off the GPU. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); - -/** - * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required - * and used cores. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev); - /** * kbasep_pm_metrics_init - Initialize the metrics gathering framework. * @@ -565,4 +546,79 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); */ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); +#ifdef CONFIG_MALI_DEVFREQ +/** + * kbase_devfreq_set_core_mask - Set devfreq core mask + * @kbdev: Device pointer + * @core_mask: New core mask + * + * This function is used by devfreq to change the available core mask as + * required by Dynamic Core Scaling. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); +#endif + +/** + * kbase_pm_reset_start_locked - Signal that GPU reset has started + * @kbdev: Device pointer + * + * Normal power management operation will be suspended until the reset has + * completed. + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_reset_start_locked(struct kbase_device *kbdev); + +/** + * kbase_pm_reset_complete - Signal that GPU reset has completed + * @kbdev: Device pointer + * + * Normal power management operation will be resumed. The power manager will + * re-evaluate what cores are needed and power on or off as required. + */ +void kbase_pm_reset_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_enable - Enable the protected mode override + * @kbdev: Device pointer + * + * When the protected mode override is enabled, all shader cores are requested + * to power down, and the L2 power state can be controlled by + * kbase_pm_protected_l2_override(). + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_disable - Disable the protected mode override + * @kbdev: Device pointer + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_l2_override - Control the protected mode L2 override + * @kbdev: Device pointer + * @override: true to enable the override, false to disable + * + * When the driver is transitioning in or out of protected mode, the L2 cache is + * forced to power off. This can be overridden to force the L2 cache to power + * on. This is required to change coherency settings on some GPUs. + */ +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + +/* If true, the driver should explicitly control corestack power management, + * instead of relying on the Power Domain Controller. + */ +extern bool corestack_driver_control; + +/* If true, disable powering-down of individual cores, and just power-down at + * the top-level using platform-specific code. + * If false, use the expected behaviour of controlling the individual cores + * from within the driver. + */ +extern bool platform_power_down_only; + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 1d771e6d0f7b..2f06a0a4b247 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -30,285 +30,51 @@ #include #include -static const struct kbase_pm_policy *const policy_list[] = { +static const struct kbase_pm_policy *const all_policy_list[] = { #ifdef CONFIG_MALI_NO_MALI &kbase_pm_always_on_policy_ops, - &kbase_pm_demand_policy_ops, &kbase_pm_coarse_demand_policy_ops, #if !MALI_CUSTOMER_RELEASE - &kbase_pm_demand_always_powered_policy_ops, - &kbase_pm_fast_start_policy_ops, + &kbase_pm_always_on_demand_policy_ops, #endif #else /* CONFIG_MALI_NO_MALI */ -#if !PLATFORM_POWER_DOWN_ONLY - &kbase_pm_demand_policy_ops, -#endif /* !PLATFORM_POWER_DOWN_ONLY */ &kbase_pm_coarse_demand_policy_ops, - &kbase_pm_always_on_policy_ops, #if !MALI_CUSTOMER_RELEASE -#if !PLATFORM_POWER_DOWN_ONLY - &kbase_pm_demand_always_powered_policy_ops, - &kbase_pm_fast_start_policy_ops, -#endif /* !PLATFORM_POWER_DOWN_ONLY */ + &kbase_pm_always_on_demand_policy_ops, #endif + &kbase_pm_always_on_policy_ops #endif /* CONFIG_MALI_NO_MALI */ }; -/* The number of policies available in the system. - * This is derived from the number of functions listed in policy_get_functions. - */ -#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) - - -/* Function IDs for looking up Timeline Trace codes in - * kbase_pm_change_state_trace_code */ -enum kbase_pm_func_id { - KBASE_PM_FUNC_ID_REQUEST_CORES_START, - KBASE_PM_FUNC_ID_REQUEST_CORES_END, - KBASE_PM_FUNC_ID_RELEASE_CORES_START, - KBASE_PM_FUNC_ID_RELEASE_CORES_END, - /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither - * expect to hit it nor tend to hit it very much anyway. We can detect - * whether we need more instrumentation by a difference between - * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ - - /* Must be the last */ - KBASE_PM_FUNC_ID_COUNT -}; - - -/* State changes during request/unrequest/release-ing cores */ -enum { - KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), - KBASE_PM_CHANGE_STATE_TILER = (1u << 1), - - /* These two must be last */ - KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | - KBASE_PM_CHANGE_STATE_SHADER), - KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 -}; -typedef u32 kbase_pm_change_state; - - -#ifdef CONFIG_MALI_TRACE_TIMELINE -/* Timeline Trace code lookups for each function */ -static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT] - [KBASE_PM_CHANGE_STATE_COUNT] = { - /* kbase_pm_request_cores */ - [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, - [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, - [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, - [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | - KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, - - [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, - [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, - [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, - [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | - KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, - - /* kbase_pm_release_cores */ - [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, - [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, - [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, - [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | - KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, - - [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, - [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, - [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, - [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | - KBASE_PM_CHANGE_STATE_TILER] = - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END -}; - -static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, - enum kbase_pm_func_id func_id, - kbase_pm_change_state state) -{ - int trace_code; - - KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); - KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == - state); - - trace_code = kbase_pm_change_state_trace_code[func_id][state]; - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); -} - -#else /* CONFIG_MALI_TRACE_TIMELINE */ -static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, - enum kbase_pm_func_id func_id, kbase_pm_change_state state) -{ -} - -#endif /* CONFIG_MALI_TRACE_TIMELINE */ - -/** - * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any - * requested shader cores - * @kbdev: Device pointer +/* A filtered list of policies available in the system, calculated by filtering + * all_policy_list based on the flags provided by each policy. */ -static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) -{ - u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; - u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->pm.backend.desired_shader_state &= - ~kbdev->pm.backend.shader_poweroff_pending; - kbdev->pm.backend.desired_tiler_state &= - ~kbdev->pm.backend.tiler_poweroff_pending; - - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; - - if (prev_shader_state != kbdev->pm.backend.desired_shader_state || - prev_tiler_state != - kbdev->pm.backend.desired_tiler_state || - kbdev->pm.backend.ca_in_transition) { - bool cores_are_available; - - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - KBASE_TIMELINE_PM_CHECKTRANS(kbdev, - SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); - - /* Don't need 'cores_are_available', - * because we don't return anything */ - CSTD_UNUSED(cores_are_available); - } -} - -static enum hrtimer_restart -kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) -{ - struct kbase_device *kbdev; - unsigned long flags; - - kbdev = container_of(timer, struct kbase_device, - pm.backend.gpu_poweroff_timer); +static const struct kbase_pm_policy *enabled_policy_list[ARRAY_SIZE(all_policy_list)]; +static size_t enabled_policy_count; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* It is safe for this call to do nothing if the work item is already - * queued. The worker function will read the must up-to-date state of - * kbdev->pm.backend.gpu_poweroff_pending under lock. - * - * If a state change occurs while the worker function is processing, - * this call will succeed as a work item can be requeued once it has - * started processing. - */ - if (kbdev->pm.backend.gpu_poweroff_pending) - queue_work(kbdev->pm.backend.gpu_poweroff_wq, - &kbdev->pm.backend.gpu_poweroff_work); - - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending_time--; - - KBASE_DEBUG_ASSERT( - kbdev->pm.backend.shader_poweroff_pending_time - >= 0); - - if (!kbdev->pm.backend.shader_poweroff_pending_time) - kbasep_pm_do_poweroff_cores(kbdev); - } - - if (kbdev->pm.backend.poweroff_timer_needed) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); - - return HRTIMER_RESTART; - } - - kbdev->pm.backend.poweroff_timer_running = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return HRTIMER_NORESTART; -} - -static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) +static void generate_filtered_policy_list(void) { - unsigned long flags; - struct kbase_device *kbdev; - bool do_poweroff = false; - - kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_work); + size_t i; - mutex_lock(&kbdev->pm.lock); - - if (kbdev->pm.backend.gpu_poweroff_pending == 0) { - mutex_unlock(&kbdev->pm.lock); - return; - } - - kbdev->pm.backend.gpu_poweroff_pending--; - - if (kbdev->pm.backend.gpu_poweroff_pending > 0) { - mutex_unlock(&kbdev->pm.lock); - return; - } - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Only power off the GPU if a request is still pending */ - if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) - do_poweroff = true; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) { + const struct kbase_pm_policy *pol = all_policy_list[i]; - if (do_poweroff) { - kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - kbdev->pm.backend.poweroff_timer_running = false; + if (platform_power_down_only && + (pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY)) + continue; - /* Power off the GPU */ - kbase_pm_do_poweroff(kbdev, false); + enabled_policy_list[enabled_policy_count++] = pol; } - - mutex_unlock(&kbdev->pm.lock); } int kbase_pm_policy_init(struct kbase_device *kbdev) { - struct workqueue_struct *wq; - - wq = alloc_workqueue("kbase_pm_do_poweroff", - WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!wq) - return -ENOMEM; - - kbdev->pm.backend.gpu_poweroff_wq = wq; - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, - kbasep_pm_do_gpu_poweroff_wq); - hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - kbdev->pm.backend.gpu_poweroff_timer.function = - kbasep_pm_do_gpu_poweroff_callback; - kbdev->pm.backend.pm_current_policy = policy_list[0]; + generate_filtered_policy_list(); + if (enabled_policy_count == 0) + return -EINVAL; + + kbdev->pm.backend.pm_current_policy = enabled_policy_list[0]; kbdev->pm.backend.pm_current_policy->init(kbdev); - kbdev->pm.gpu_poweroff_time = - HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); - kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; - kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; return 0; } @@ -316,29 +82,6 @@ int kbase_pm_policy_init(struct kbase_device *kbdev) void kbase_pm_policy_term(struct kbase_device *kbdev) { kbdev->pm.backend.pm_current_policy->term(kbdev); - destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); -} - -void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) -{ - unsigned long flags; - - lockdep_assert_held(&kbdev->pm.lock); - - kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.poweroff_timer_running = false; - - /* If wq is already running but is held off by pm.lock, make sure it has - * no effect */ - kbdev->pm.backend.gpu_poweroff_pending = 0; - - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; - kbdev->pm.backend.shader_poweroff_pending_time = 0; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_pm_update_active(struct kbase_device *kbdev) @@ -356,37 +99,29 @@ void kbase_pm_update_active(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); active = backend->pm_current_policy->get_core_active(kbdev); + WARN((kbase_pm_is_active(kbdev) && !active), + "GPU is active but policy '%s' is indicating that it can be powered off", + kbdev->pm.backend.pm_current_policy->name); if (active) { - if (backend->gpu_poweroff_pending) { - /* Cancel any pending power off request */ - backend->gpu_poweroff_pending = 0; - - /* If a request was pending then the GPU was still - * powered, so no need to continue */ - if (!kbdev->poweroff_pending) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - return; - } - } - - if (!backend->poweroff_timer_running && !backend->gpu_powered && - (pm->poweroff_gpu_ticks || - pm->poweroff_shader_ticks)) { - backend->poweroff_timer_needed = true; - backend->poweroff_timer_running = true; - hrtimer_start(&backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - /* Power on the GPU and any cores requested by the policy */ - if (pm->backend.poweroff_wait_in_progress) { + if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && + pm->backend.poweroff_wait_in_progress) { KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); pm->backend.poweron_required = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } else { + /* Cancel the the invocation of + * kbase_pm_gpu_poweroff_wait_wq() from the L2 state + * machine. This is safe - it + * invoke_poweroff_wait_wq_when_l2_off is true, then + * the poweroff work hasn't even been queued yet, + * meaning we can go straight to powering on. + */ + pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; + pm->backend.poweroff_wait_in_progress = false; + pm->backend.l2_desired = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_do_poweron(kbdev, false); } @@ -395,41 +130,12 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (backend->shader_poweroff_pending || - backend->tiler_poweroff_pending) { - backend->shader_poweroff_pending = 0; - backend->tiler_poweroff_pending = 0; - backend->shader_poweroff_pending_time = 0; - } - /* Request power off */ if (pm->backend.gpu_powered) { - if (pm->poweroff_gpu_ticks) { - backend->gpu_poweroff_pending = - pm->poweroff_gpu_ticks; - backend->poweroff_timer_needed = true; - if (!backend->poweroff_timer_running) { - /* Start timer if not running (eg if - * power policy has been changed from - * always_on to something else). This - * will ensure the GPU is actually - * powered off */ - backend->poweroff_timer_running - = true; - hrtimer_start( - &backend->gpu_poweroff_timer, - pm->gpu_poweroff_time, - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - - /* Power off the GPU immediately */ - kbase_pm_do_poweroff(kbdev, false); - } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Power off the GPU immediately */ + kbase_pm_do_poweroff(kbdev, false); } else { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -438,10 +144,7 @@ void kbase_pm_update_active(struct kbase_device *kbdev) void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { - u64 desired_bitmap; - u64 desired_tiler_bitmap; - bool cores_are_available; - bool do_poweroff = false; + bool shaders_desired; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -450,116 +153,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) if (kbdev->pm.backend.poweroff_wait_in_progress) return; - if (kbdev->protected_mode_transition && !kbdev->shader_needed_bitmap && - !kbdev->shader_inuse_bitmap && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) { + if (kbdev->pm.backend.protected_transition_override) /* We are trying to change in/out of protected mode - force all * cores off so that the L2 powers down */ - desired_bitmap = 0; - desired_tiler_bitmap = 0; - } else { - desired_bitmap = - kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); - desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); - - if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_tiler_bitmap = 1; - else - desired_tiler_bitmap = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability */ - if (kbdev->tiler_needed_cnt > 0 || - kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; - } - } + shaders_desired = false; + else + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); - if (kbdev->pm.backend.desired_shader_state != desired_bitmap) + if (kbdev->pm.backend.shaders_desired != shaders_desired) { KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, - (u32)desired_bitmap); - /* Are any cores being powered on? */ - if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || - ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || - kbdev->pm.backend.ca_in_transition) { - /* Check if we are powering off any cores before updating shader - * state */ - if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { - /* Start timer to power off cores */ - kbdev->pm.backend.shader_poweroff_pending |= - (kbdev->pm.backend.desired_shader_state & - ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); - - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) - kbdev->pm.backend.shader_poweroff_pending_time = - kbdev->pm.poweroff_shader_ticks; - else - do_poweroff = true; - } - - kbdev->pm.backend.desired_shader_state = desired_bitmap; - kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; - - /* If any cores are being powered on, transition immediately */ - cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { - /* Start timer to power off cores */ - kbdev->pm.backend.shader_poweroff_pending |= - (kbdev->pm.backend.desired_shader_state & - ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); - if (kbdev->pm.poweroff_shader_ticks && - !kbdev->protected_mode_transition) - kbdev->pm.backend.shader_poweroff_pending_time = - kbdev->pm.poweroff_shader_ticks; - else - kbasep_pm_do_poweroff_cores(kbdev); - } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && - desired_tiler_bitmap != 0 && - kbdev->pm.backend.poweroff_timer_needed) { - /* If power policy is keeping cores on despite there being no - * active contexts then disable poweroff timer as it isn't - * required. - * Only reset poweroff_timer_needed if we're not in the middle - * of the power off callback */ - kbdev->pm.backend.poweroff_timer_needed = false; - } + (u32)kbdev->pm.backend.shaders_desired); - /* Ensure timer does not power off wanted cores and make sure to power - * off unwanted cores */ - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending &= - ~(kbdev->pm.backend.desired_shader_state & - desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending &= - ~(kbdev->pm.backend.desired_tiler_state & - desired_tiler_bitmap); - - if (!kbdev->pm.backend.shader_poweroff_pending && - !kbdev->pm.backend.tiler_poweroff_pending) - kbdev->pm.backend.shader_poweroff_pending_time = 0; + kbdev->pm.backend.shaders_desired = shaders_desired; + kbase_pm_update_state(kbdev); } - - /* Shader poweroff is deferred to the end of the function, to eliminate - * issues caused by the core availability policy recursing into this - * function */ - if (do_poweroff) - kbasep_pm_do_poweroff_cores(kbdev); - - /* Don't need 'cores_are_available', because we don't return anything */ - CSTD_UNUSED(cores_are_available); } void kbase_pm_update_cores_state(struct kbase_device *kbdev) @@ -575,12 +182,11 @@ void kbase_pm_update_cores_state(struct kbase_device *kbdev) int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) { - if (!list) - return POLICY_COUNT; - - *list = policy_list; + WARN_ON(enabled_policy_count == 0); + if (list) + *list = enabled_policy_list; - return POLICY_COUNT; + return enabled_policy_count; } KBASE_EXPORT_TEST_API(kbase_pm_list_policies); @@ -649,336 +255,3 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, } KBASE_EXPORT_TEST_API(kbase_pm_set_policy); - -/* Check whether a state change has finished, and trace it as completed */ -static void -kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) -{ - if ((kbdev->shader_available_bitmap & - kbdev->pm.backend.desired_shader_state) - == kbdev->pm.backend.desired_shader_state && - (kbdev->tiler_available_bitmap & - kbdev->pm.backend.desired_tiler_state) - == kbdev->pm.backend.desired_tiler_state) - kbase_timeline_pm_check_handle_event(kbdev, - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); -} - -void kbase_pm_request_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores) -{ - u64 cores; - - kbase_pm_change_state change_gpu_state = 0u; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - cores = shader_cores; - while (cores) { - int bitnum = fls64(cores) - 1; - u64 bit = 1ULL << bitnum; - - /* It should be almost impossible for this to overflow. It would - * require 2^32 atoms to request a particular core, which would - * require 2^24 contexts to submit. This would require an amount - * of memory that is impossible on a 32-bit system and extremely - * unlikely on a 64-bit system. */ - int cnt = ++kbdev->shader_needed_cnt[bitnum]; - - if (1 == cnt) { - kbdev->shader_needed_bitmap |= bit; - change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; - } - - cores &= ~bit; - } - - if (tiler_required) { - int cnt = ++kbdev->tiler_needed_cnt; - - if (1 == cnt) - change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; - - KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); - } - - if (change_gpu_state) { - KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, - NULL, 0u, (u32) kbdev->shader_needed_bitmap); - - kbase_timeline_pm_cores_func(kbdev, - KBASE_PM_FUNC_ID_REQUEST_CORES_START, - change_gpu_state); - kbase_pm_update_cores_state_nolock(kbdev); - kbase_timeline_pm_cores_func(kbdev, - KBASE_PM_FUNC_ID_REQUEST_CORES_END, - change_gpu_state); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_cores); - -void kbase_pm_unrequest_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores) -{ - kbase_pm_change_state change_gpu_state = 0u; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - while (shader_cores) { - int bitnum = fls64(shader_cores) - 1; - u64 bit = 1ULL << bitnum; - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); - - cnt = --kbdev->shader_needed_cnt[bitnum]; - - if (0 == cnt) { - kbdev->shader_needed_bitmap &= ~bit; - - change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; - } - - shader_cores &= ~bit; - } - - if (tiler_required) { - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); - - cnt = --kbdev->tiler_needed_cnt; - - if (0 == cnt) - change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; - } - - if (change_gpu_state) { - KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, - NULL, 0u, (u32) kbdev->shader_needed_bitmap); - - kbase_pm_update_cores_state_nolock(kbdev); - - /* Trace that any state change effectively completes immediately - * - no-one will wait on the state change */ - kbase_pm_trace_check_and_finish_state_change(kbdev); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); - -enum kbase_pm_cores_ready -kbase_pm_register_inuse_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores) -{ - u64 prev_shader_needed; /* Just for tracing */ - u64 prev_shader_inuse; /* Just for tracing */ - - lockdep_assert_held(&kbdev->hwaccess_lock); - - prev_shader_needed = kbdev->shader_needed_bitmap; - prev_shader_inuse = kbdev->shader_inuse_bitmap; - - /* If desired_shader_state does not contain the requested cores, then - * power management is not attempting to powering those cores (most - * likely due to core availability policy) and a new job affinity must - * be chosen */ - if ((kbdev->pm.backend.desired_shader_state & shader_cores) != - shader_cores) { - return (kbdev->pm.backend.poweroff_wait_in_progress || - kbdev->pm.backend.pm_current_policy == NULL) ? - KBASE_CORES_NOT_READY : KBASE_NEW_AFFINITY; - } - - if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || - (tiler_required && !kbdev->tiler_available_bitmap)) { - /* Trace ongoing core transition */ - kbase_timeline_pm_l2_transition_start(kbdev); - return KBASE_CORES_NOT_READY; - } - - /* If we started to trace a state change, then trace it has being - * finished by now, at the very latest */ - kbase_pm_trace_check_and_finish_state_change(kbdev); - /* Trace core transition done */ - kbase_timeline_pm_l2_transition_done(kbdev); - - while (shader_cores) { - int bitnum = fls64(shader_cores) - 1; - u64 bit = 1ULL << bitnum; - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); - - cnt = --kbdev->shader_needed_cnt[bitnum]; - - if (0 == cnt) - kbdev->shader_needed_bitmap &= ~bit; - - /* shader_inuse_cnt should not overflow because there can only - * be a very limited number of jobs on the h/w at one time */ - - kbdev->shader_inuse_cnt[bitnum]++; - kbdev->shader_inuse_bitmap |= bit; - - shader_cores &= ~bit; - } - - if (tiler_required) { - KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); - - --kbdev->tiler_needed_cnt; - - kbdev->tiler_inuse_cnt++; - - KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); - } - - if (prev_shader_needed != kbdev->shader_needed_bitmap) - KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, - NULL, 0u, (u32) kbdev->shader_needed_bitmap); - - if (prev_shader_inuse != kbdev->shader_inuse_bitmap) - KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, - NULL, 0u, (u32) kbdev->shader_inuse_bitmap); - - return KBASE_CORES_READY; -} - -KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); - -void kbase_pm_release_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores) -{ - kbase_pm_change_state change_gpu_state = 0u; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - while (shader_cores) { - int bitnum = fls64(shader_cores) - 1; - u64 bit = 1ULL << bitnum; - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); - - cnt = --kbdev->shader_inuse_cnt[bitnum]; - - if (0 == cnt) { - kbdev->shader_inuse_bitmap &= ~bit; - change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; - } - - shader_cores &= ~bit; - } - - if (tiler_required) { - int cnt; - - KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); - - cnt = --kbdev->tiler_inuse_cnt; - - if (0 == cnt) - change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; - } - - if (change_gpu_state) { - KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, - NULL, 0u, (u32) kbdev->shader_inuse_bitmap); - - kbase_timeline_pm_cores_func(kbdev, - KBASE_PM_FUNC_ID_RELEASE_CORES_START, - change_gpu_state); - kbase_pm_update_cores_state_nolock(kbdev); - kbase_timeline_pm_cores_func(kbdev, - KBASE_PM_FUNC_ID_RELEASE_CORES_END, - change_gpu_state); - - /* Trace that any state change completed immediately */ - kbase_pm_trace_check_and_finish_state_change(kbdev); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_release_cores); - -void kbase_pm_request_cores_sync(struct kbase_device *kbdev, - bool tiler_required, - u64 shader_cores) -{ - unsigned long flags; - - kbase_pm_wait_for_poweroff_complete(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_request_cores(kbdev, tiler_required, shader_cores); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - kbase_pm_check_transitions_sync(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); - -void kbase_pm_request_l2_caches(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbdev->l2_users_count++; - - KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); - - /* Check for the required L2 transitions. - * Caller would block here for the L2 caches of all core groups to be - * powered on, so need to inform the Hw to power up all the L2 caches. - * Can't rely on the l2_users_count value being non-zero previously to - * avoid checking for the transition, as the count could be non-zero - * even if not all the instances of L2 cache are powered up since - * currently the power status of L2 is not tracked separately for each - * core group. Also if the GPU is reset while the L2 is on, L2 will be - * off but the count will be non-zero. - */ - kbase_pm_check_transitions_nolock(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - wait_event(kbdev->pm.backend.l2_powered_wait, - kbdev->pm.backend.l2_powered == 1); - - /* Trace that any state change completed immediately */ - kbase_pm_trace_check_and_finish_state_change(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); - -void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->l2_users_count++; -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); - -void kbase_pm_release_l2_caches(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); - - --kbdev->l2_users_count; - - if (!kbdev->l2_users_count) { - kbase_pm_check_transitions_nolock(kbdev); - /* Trace that any state change completed immediately */ - kbase_pm_trace_check_and_finish_state_change(kbdev); - } -} - -KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h index 852fedd346ba..28d258fce359 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -64,169 +64,44 @@ void kbase_pm_update_active(struct kbase_device *kbdev); */ void kbase_pm_update_cores(struct kbase_device *kbdev); - -enum kbase_pm_cores_ready { - KBASE_CORES_NOT_READY = 0, - KBASE_NEW_AFFINITY = 1, - KBASE_CORES_READY = 2 -}; - - -/** - * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() - * - * @kbdev: The kbase device structure for the device - * @tiler_required: true if the tiler is required, false otherwise - * @shader_cores: A bitmask of shader cores which are necessary for the job - * - * When this function returns, the @shader_cores will be in the READY state. - * - * This is safe variant of kbase_pm_check_transitions_sync(): it handles the - * work of ensuring the requested cores will remain powered until a matching - * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) - * is made. - */ -void kbase_pm_request_cores_sync(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores); - /** - * kbase_pm_request_cores - Mark one or more cores as being required - * for jobs to be submitted - * - * @kbdev: The kbase device structure for the device - * @tiler_required: true if the tiler is required, false otherwise - * @shader_cores: A bitmask of shader cores which are necessary for the job - * - * This function is called by the job scheduler to mark one or more cores as - * being required to submit jobs that are ready to run. - * - * The cores requested are reference counted and a subsequent call to - * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be - * made to dereference the cores as being 'needed'. - * - * The active power policy will meet or exceed the requirements of the - * requested cores in the system. Any core transitions needed will be begun - * immediately, but they might not complete/the cores might not be available - * until a Power Management IRQ. - * - * Return: 0 if the cores were successfully requested, or -errno otherwise. - */ -void kbase_pm_request_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores); - -/** - * kbase_pm_unrequest_cores - Unmark one or more cores as being required for - * jobs to be submitted. - * - * @kbdev: The kbase device structure for the device - * @tiler_required: true if the tiler is required, false otherwise - * @shader_cores: A bitmask of shader cores (as given to - * kbase_pm_request_cores() ) - * - * This function undoes the effect of kbase_pm_request_cores(). It should be - * used when a job is not going to be submitted to the hardware (e.g. the job is - * cancelled before it is enqueued). - * - * The active power policy will meet or exceed the requirements of the - * requested cores in the system. Any core transitions needed will be begun - * immediately, but they might not complete until a Power Management IRQ. - * - * The policy may use this as an indication that it can power down cores. - */ -void kbase_pm_unrequest_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores); - -/** - * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job - * - * @kbdev: The kbase device structure for the device - * @tiler_required: true if the tiler is required, false otherwise - * @shader_cores: A bitmask of shader cores (as given to - * kbase_pm_request_cores() ) - * - * This function should be called after kbase_pm_request_cores() when the job - * is about to be submitted to the hardware. It will check that the necessary - * cores are available and if so update the 'needed' and 'inuse' bitmasks to - * reflect that the job is now committed to being run. - * - * If the necessary cores are not currently available then the function will - * return %KBASE_CORES_NOT_READY and have no effect. + * kbase_pm_cores_requested - Check that a power request has been locked into + * the HW. + * @kbdev: Kbase device + * @shader_required: true if shaders are required * - * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, - * - * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, - * - * %KBASE_CORES_READY if the cores requested are already available - */ -enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( - struct kbase_device *kbdev, - bool tiler_required, - u64 shader_cores); - -/** - * kbase_pm_release_cores - Release cores after a job has run - * - * @kbdev: The kbase device structure for the device - * @tiler_required: true if the tiler is required, false otherwise - * @shader_cores: A bitmask of shader cores (as given to - * kbase_pm_register_inuse_cores() ) - * - * This function should be called when a job has finished running on the - * hardware. A call to kbase_pm_register_inuse_cores() must have previously - * occurred. The reference counts of the specified cores will be decremented - * which may cause the bitmask of 'inuse' cores to be reduced. The power policy - * may then turn off any cores which are no longer 'inuse'. - */ -void kbase_pm_release_cores(struct kbase_device *kbdev, - bool tiler_required, u64 shader_cores); - -/** - * kbase_pm_request_l2_caches - Request l2 caches - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Request the use of l2 caches for all core groups, power up, wait and prevent - * the power manager from powering down the l2 caches. - * - * This tells the power management that the caches should be powered up, and - * they should remain powered, irrespective of the usage of shader cores. This - * does not return until the l2 caches are powered up. - * - * The caller must call kbase_pm_release_l2_caches() when they are finished - * to allow normal power management of the l2 caches to resume. - * - * This should only be used when power management is active. - */ -void kbase_pm_request_l2_caches(struct kbase_device *kbdev); - -/** - * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Increment the count of l2 users but do not attempt to power on the l2 - * - * It is the callers responsibility to ensure that the l2 is already powered up - * and to eventually call kbase_pm_release_l2_caches() - */ -void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); - -/** - * kbase_pm_request_l2_caches - Release l2 caches - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Called by the scheduler to check if a power on request has been locked into + * the HW. * - * Release the use of l2 caches for all core groups and allow the power manager - * to power them down when necessary. + * Note that there is no guarantee that the cores are actually ready, however + * when the request has been locked into the HW, then it is safe to submit work + * since the HW will wait for the transition to ready. * - * This tells the power management that the caches can be powered down if - * necessary, with respect to the usage of shader cores. + * A reference must first be taken prior to making this call. * - * The caller must have called kbase_pm_request_l2_caches() prior to a call - * to this. + * Caller must hold the hwaccess_lock. * - * This should only be used when power management is active. + * Return: true if the request to the HW was successfully made else false if the + * request is still pending. */ -void kbase_pm_release_l2_caches(struct kbase_device *kbdev); +static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, + bool shader_required) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* If the L2 & tiler are not on or pending, then the tiler is not yet + * available, and shaders are definitely not powered. + */ + if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON) + return false; + + if (shader_required && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + return false; + + return true; +} #endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c index cef07455cbae..5e1b761cf43c 100755 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,24 +35,20 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, /* Read hi, lo, hi to ensure that overflow from lo to hi is handled * correctly */ do { - hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), - NULL); + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI)); *cycle_counter = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); - hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), - NULL); + GPU_CONTROL_REG(CYCLE_COUNT_LO)); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI)); *cycle_counter |= (((u64) hi1) << 32); } while (hi1 != hi2); /* Read hi, lo, hi to ensure that overflow from lo to hi is handled * correctly */ do { - hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), - NULL); + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI)); *system_time = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_LO), NULL); - hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), - NULL); + GPU_CONTROL_REG(TIMESTAMP_LO)); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI)); *system_time |= (((u64) hi1) << 32); } while (hi1 != hi2); @@ -64,7 +60,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, /** * kbase_wait_write_flush - Wait for GPU write flush - * @kctx: Context pointer + * @kbdev: Kbase device * * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush * its write buffer. @@ -75,7 +71,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, * not be as expected. */ #ifndef CONFIG_MALI_NO_MALI -void kbase_wait_write_flush(struct kbase_context *kctx) +void kbase_wait_write_flush(struct kbase_device *kbdev) { u32 base_count = 0; @@ -83,14 +79,14 @@ void kbase_wait_write_flush(struct kbase_context *kctx) * The caller must be holding onto the kctx or the call is from * userspace. */ - kbase_pm_context_active(kctx->kbdev); - kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + kbase_pm_context_active(kbdev); + kbase_pm_request_gpu_cycle_counter(kbdev); while (true) { u32 new_count; - new_count = kbase_reg_read(kctx->kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + new_count = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO)); /* First time around, just store the count. */ if (base_count == 0) { base_count = new_count; @@ -102,7 +98,7 @@ void kbase_wait_write_flush(struct kbase_context *kctx) break; } - kbase_pm_release_gpu_cycle_counter(kctx->kbdev); - kbase_pm_context_idle(kctx->kbdev); + kbase_pm_release_gpu_cycle_counter(kbdev); + kbase_pm_context_idle(kbdev); } #endif /* CONFIG_MALI_NO_MALI */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h deleted file mode 100755 index e1bd2632b2d6..000000000000 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_BACKEND_TIME_H_ -#define _KBASE_BACKEND_TIME_H_ - -/** - * kbase_backend_get_gpu_time() - Get current GPU time - * @kbdev: Device pointer - * @cycle_counter: Pointer to u64 to store cycle counter in - * @system_time: Pointer to u64 to store system time in - * @ts: Pointer to struct timespec to store current monotonic - * time in - */ -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts); - -/** - * kbase_wait_write_flush() - Wait for GPU write flush - * @kctx: Context pointer - * - * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush - * its write buffer. - * - * If GPU resets occur then the counters are reset to zero, the delay may not be - * as expected. - * - * This function is only in use for BASE_HW_ISSUE_6367 - */ -#ifdef CONFIG_MALI_NO_MALI -static inline void kbase_wait_write_flush(struct kbase_context *kctx) -{ -} -#else -void kbase_wait_write_flush(struct kbase_context *kctx); -#endif - -#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/midgard/build.bp b/drivers/gpu/arm/midgard/build.bp index afc39ff61afb..2cf685c0eb66 100755 --- a/drivers/gpu/arm/midgard/build.bp +++ b/drivers/gpu/arm/midgard/build.bp @@ -19,18 +19,12 @@ bob_defaults { no_mali: { kbuild_options: ["CONFIG_MALI_NO_MALI=y"], }, - mali_corestack: { - kbuild_options: ["CONFIG_MALI_CORESTACK=y"], - }, mali_devfreq: { kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], }, mali_midgard_dvfs: { kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], }, - mali_trace_timeline: { - kbuild_options: ["CONFIG_MALI_TRACE_TIMELINE=y"], - }, mali_debug: { kbuild_options: ["CONFIG_MALI_DEBUG=y"], }, @@ -40,15 +34,25 @@ bob_defaults { cinstr_job_dump: { kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"], }, + cinstr_vector_dump: { + kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], + }, + cinstr_gwt: { + kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], + }, mali_gator_support: { kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], }, mali_system_trace: { kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], }, + mali_pwrsoft_765: { + kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], + }, kbuild_options: [ "MALI_UNIT_TEST={{.unit_test_code}}", "MALI_CUSTOMER_RELEASE={{.release}}", + "MALI_USE_CSF={{.gpu_has_csf}}", "MALI_KERNEL_TEST_API={{.debug}}", ], defaults: ["kernel_defaults"], @@ -77,25 +81,33 @@ bob_kernel_module { "CONFIG_MALI_MIDGARD=m", "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", - "MALI_KERNEL_TEST_API={{.unit_test_code}}", - "MALI_MOCK_TEST={{.mali_mock_test}}", ], + mali_fpga_bus_logger: { + extra_symbols: [ + "bus_logger", + ], + }, + mali_corestack: { + kbuild_options: ["CONFIG_MALI_CORESTACK=y"], + }, mali_error_inject: { kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], }, mali_error_inject_random: { kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], }, - mali_prfcnt_set_secondary: { + cinstr_secondary_hwc: { kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], }, mali_2mb_alloc: { kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], }, - mali_mock_test: { - srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"], + gpu_has_csf: { + srcs: [ + "csf/*.c", + "csf/*.h", + "csf/Kbuild", + ], }, defaults: ["mali_kbase_shared_config_defaults"], } - -optional_subdirs = ["tests"] diff --git a/drivers/gpu/arm/midgard/ipa/Kbuild b/drivers/gpu/arm/midgard/ipa/Kbuild index 297d7f90ae7f..3d9cf8006b80 100755 --- a/drivers/gpu/arm/midgard/ipa/Kbuild +++ b/drivers/gpu/arm/midgard/ipa/Kbuild @@ -21,12 +21,8 @@ mali_kbase-y += \ ipa/mali_kbase_ipa_simple.o \ - ipa/mali_kbase_ipa.o - -mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o - -ifneq ($(wildcard $(src)/ipa/mali_kbase_ipa_vinstr_common.c),) - mali_kbase-y += \ + ipa/mali_kbase_ipa.o \ ipa/mali_kbase_ipa_vinstr_g7x.o \ ipa/mali_kbase_ipa_vinstr_common.o -endif + +mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o \ No newline at end of file diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c index 254c1a8b602f..9da2878e1417 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c @@ -38,15 +38,15 @@ #endif #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" -#define KBASE_IPA_G71_MODEL_NAME "mali-g71-power-model" -#define KBASE_IPA_G72_MODEL_NAME "mali-g72-power-model" -#define KBASE_IPA_TNOX_MODEL_NAME "mali-tnox-power-model" -static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { +static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { &kbase_simple_ipa_model_ops, &kbase_g71_ipa_model_ops, &kbase_g72_ipa_model_ops, - &kbase_tnox_ipa_model_ops + &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, + &kbase_g52_r1_ipa_model_ops, + &kbase_g51_ipa_model_ops }; int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) @@ -67,13 +67,13 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) return err; } -static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, const char *name) { int i; for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { - struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; + const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; if (!strcmp(ops->name, name)) return ops; @@ -83,16 +83,7 @@ static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device return NULL; } - -void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) -{ - atomic_set(&kbdev->ipa_use_configured_model, false); -} - -void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) -{ - atomic_set(&kbdev->ipa_use_configured_model, true); -} +KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); const char *kbase_ipa_model_name_from_id(u32 gpu_id) { @@ -102,16 +93,20 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) if (GPU_ID_IS_NEW_FORMAT(prod_id)) { switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { case GPU_ID2_PRODUCT_TMIX: - return KBASE_IPA_G71_MODEL_NAME; + return "mali-g71-power-model"; case GPU_ID2_PRODUCT_THEX: - return KBASE_IPA_G72_MODEL_NAME; + return "mali-g72-power-model"; case GPU_ID2_PRODUCT_TNOX: - return KBASE_IPA_TNOX_MODEL_NAME; + return "mali-g76-power-model"; + case GPU_ID2_PRODUCT_TSIX: + return "mali-g51-power-model"; case GPU_ID2_PRODUCT_TGOX: if ((gpu_id & GPU_ID2_VERSION_MAJOR) == (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) - /* TGOX r0 shares a power model with TNOX */ - return KBASE_IPA_TNOX_MODEL_NAME; + /* g52 aliased to g76 power-model's ops */ + return "mali-g52-power-model"; + else + return "mali-g52_r1-power-model"; default: return KBASE_IPA_FALLBACK_MODEL_NAME; } @@ -119,6 +114,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id) return KBASE_IPA_FALLBACK_MODEL_NAME; } +KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); static struct device_node *get_model_dt_node(struct kbase_ipa_model *model) { @@ -251,7 +247,7 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model) KBASE_EXPORT_TEST_API(kbase_ipa_term_model); struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - struct kbase_ipa_model_ops *ops) + const struct kbase_ipa_model_ops *ops) { struct kbase_ipa_model *model; int err; @@ -305,7 +301,7 @@ int kbase_ipa_init(struct kbase_device *kbdev) { const char *model_name; - struct kbase_ipa_model_ops *ops; + const struct kbase_ipa_model_ops *ops; struct kbase_ipa_model *default_model = NULL; int err; @@ -359,8 +355,6 @@ int kbase_ipa_init(struct kbase_device *kbdev) kbdev->ipa.configured_model = default_model; } - kbase_ipa_model_use_configured_locked(kbdev); - end: if (err) kbase_ipa_term_locked(kbdev); @@ -380,6 +374,8 @@ void kbase_ipa_term(struct kbase_device *kbdev) mutex_lock(&kbdev->ipa.lock); kbase_ipa_term_locked(kbdev); mutex_unlock(&kbdev->ipa.lock); + + mutex_destroy(&kbdev->ipa.lock); } KBASE_EXPORT_TEST_API(kbase_ipa_term); @@ -449,14 +445,41 @@ u32 kbase_scale_static_power(const u32 c, const u32 voltage) return div_u64(v3c_big, 1000000); } +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Record the event of GPU entering protected mode. */ + kbdev->ipa_protection_mode_switched = true; +} + static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) { + struct kbase_ipa_model *model; + unsigned long flags; + lockdep_assert_held(&kbdev->ipa.lock); - if (atomic_read(&kbdev->ipa_use_configured_model)) - return kbdev->ipa.configured_model; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->ipa_protection_mode_switched || + kbdev->ipa.force_fallback_model) + model = kbdev->ipa.fallback_model; else - return kbdev->ipa.fallback_model; + model = kbdev->ipa.configured_model; + + /* + * Having taken cognizance of the fact that whether GPU earlier + * protected mode or not, the event can be now reset (if GPU is not + * currently in protected mode) so that configured model is used + * for the next sample. + */ + if (!kbdev->protected_mode) + kbdev->ipa_protection_mode_switched = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return model; } static u32 get_static_power_locked(struct kbase_device *kbdev, @@ -499,6 +522,9 @@ static unsigned long kbase_get_static_power(unsigned long voltage) struct kbase_device *kbdev = kbase_find_device(-1); #endif + if (!kbdev) + return 0ul; + mutex_lock(&kbdev->ipa.lock); model = get_current_model(kbdev); @@ -534,6 +560,9 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, struct kbase_device *kbdev = kbase_find_device(-1); #endif + if (!kbdev) + return 0ul; + mutex_lock(&kbdev->ipa.lock); model = kbdev->ipa.fallback_model; @@ -609,6 +638,9 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, int ret; struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + if (!kbdev) + return -ENODEV; + mutex_lock(&kbdev->ipa.lock); ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); mutex_unlock(&kbdev->ipa.lock); diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h index e215c2c89e16..746204813262 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h @@ -40,7 +40,7 @@ struct devfreq; struct kbase_ipa_model { struct kbase_device *kbdev; void *model_data; - struct kbase_ipa_model_ops *ops; + const struct kbase_ipa_model_ops *ops; struct list_head params; bool missing_dt_node_warning; }; @@ -153,9 +153,28 @@ void kbase_ipa_term(struct kbase_device *kbdev); */ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); +/** + * kbase_ipa_model_ops_find - Lookup an IPA model using its name + * @kbdev: pointer to kbase device + * @name: name of model to lookup + * + * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. + */ +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name); + +/** + * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID + * @gpu_id: GPU ID of GPU the model will be used for + * + * Return: The name of the appropriate counter-based model, or the name of the + * fallback model if no counter model exists. + */ +const char *kbase_ipa_model_name_from_id(u32 gpu_id); + /** * kbase_ipa_init_model - Initilaize the particular IPA model - * @kbdev: pointer to the IPA model object, already initialized + * @kbdev: pointer to kbase device * @ops: pointer to object containing model specific methods. * * Initialize the model corresponding to the @ops pointer passed. @@ -164,7 +183,7 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); * Return: pointer to kbase_ipa_model on success, NULL on error */ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - struct kbase_ipa_model_ops *ops); + const struct kbase_ipa_model_ops *ops); /** * kbase_ipa_term_model - Terminate the particular IPA model * @model: pointer to the IPA model object, already initialized @@ -174,17 +193,22 @@ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, */ void kbase_ipa_term_model(struct kbase_ipa_model *model); -/* Switch to the fallback model */ -void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev); - -/* Switch to the model retrieved from device tree */ -void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev); +/** + * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into + * protected mode + * @kbdev: pointer to kbase device + * + * Makes IPA aware of the GPU switching to protected mode. + */ +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); -extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; -extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; -extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; -#if MALI_UNIT_TEST /** * kbase_get_real_power() - get the real power consumption of the GPU * @df: dynamic voltage and frequency scaling information for the GPU. @@ -192,8 +216,7 @@ extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops; * @freq: a frequency, in HZ. * @voltage: a voltage, in mV. * - * This function is only exposed for use by unit tests. The returned value - * incorporates both static and dynamic power consumption. + * The returned value incorporates both static and dynamic power consumption. * * Return: 0 on success, or an error code. */ @@ -201,8 +224,10 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, unsigned long freq, unsigned long voltage); +#if MALI_UNIT_TEST /* Called by kbase_get_real_power() to invoke the power models. * Must be called with kbdev->ipa.lock held. + * This function is only exposed for use by unit tests. */ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long freq, @@ -217,10 +242,7 @@ extern struct devfreq_cooling_power kbase_ipa_power_model_ops; #else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ -static inline void kbase_ipa_model_use_fallback_locked(struct kbase_device *kbdev) -{ } - -static inline void kbase_ipa_model_use_configured_locked(struct kbase_device *kbdev) +static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) { } #endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c index 029023c6036b..6e8c23cb7163 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -188,6 +188,50 @@ void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) } } +static int force_fallback_model_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + *val = kbdev->ipa.force_fallback_model; + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +static int force_fallback_model_set(void *data, u64 val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + kbdev->ipa.force_fallback_model = (val ? true : false); + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, + force_fallback_model_get, + force_fallback_model_set, + "%llu\n"); + +static int current_power_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + struct devfreq *df = kbdev->devfreq; + u32 power; + + kbase_pm_context_active(kbdev); + kbase_get_real_power(df, &power, + kbdev->current_nominal_freq, (kbdev->current_voltage / 1000)); + kbase_pm_context_idle(kbdev); + + *val = power; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); + static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) { struct list_head *it; @@ -264,5 +308,10 @@ void kbase_ipa_debugfs_init(struct kbase_device *kbdev) kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); + debugfs_create_file("ipa_current_power", 0444, + kbdev->mali_debugfs_directory, kbdev, ¤t_power); + debugfs_create_file("ipa_force_fallback_model", 0644, + kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); + mutex_unlock(&kbdev->ipa.lock); } diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c index e684df4a6662..c8399ab22910 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c @@ -268,8 +268,9 @@ static int kbase_simple_power_model_init(struct kbase_ipa_model *model) (void *) model_data, "mali-simple-power-model-temp-poll"); if (IS_ERR(model_data->poll_temperature_thread)) { + err = PTR_ERR(model_data->poll_temperature_thread); kfree(model_data); - return PTR_ERR(model_data->poll_temperature_thread); + return err; } err = add_params(model); diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c index 4019657f41a3..1a6ba0152eb0 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c @@ -44,7 +44,7 @@ static inline u32 kbase_ipa_read_hwcnt( struct kbase_ipa_model_vinstr_data *model_data, u32 offset) { - u8 *p = model_data->vinstr_buffer; + u8 *p = (u8 *)model_data->dump_buf.dump_buf; return *(u32 *)&p[offset]; } @@ -83,126 +83,104 @@ s64 kbase_ipa_sum_all_shader_cores( return ret * coeff; } -s64 kbase_ipa_single_counter( +s64 kbase_ipa_sum_all_memsys_blocks( struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, u32 counter) -{ - /* Range: 0 < counter_value < 2^27 */ - const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); - - /* Range: -2^49 < ret < 2^49 */ - return counter_value * (s64) coeff; -} - -/** - * kbase_ipa_gpu_active - Inform IPA that GPU is now active - * @model_data: Pointer to model data - * - * This function may cause vinstr to become active. - */ -static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data) { struct kbase_device *kbdev = model_data->kbdev; + const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; + u32 base = 0; + s64 ret = 0; + u32 i; - lockdep_assert_held(&kbdev->pm.lock); + for (i = 0; i < num_blocks; i++) { + /* 0 < counter_value < 2^27 */ + u32 counter_value = kbase_ipa_read_hwcnt(model_data, + base + counter); - if (!kbdev->ipa.vinstr_active) { - kbdev->ipa.vinstr_active = true; - kbase_vinstr_resume_client(model_data->vinstr_cli); + /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ + ret = kbase_ipa_add_saturate(ret, counter_value); + base += KBASE_IPA_NR_BYTES_PER_BLOCK; } + + /* Range: -2^51 < ret * coeff < 2^51 */ + return ret * coeff; } -/** - * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle - * @model_data: Pointer to model data - * - * This function may cause vinstr to become idle. - */ -static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data) +s64 kbase_ipa_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) { - struct kbase_device *kbdev = model_data->kbdev; - - lockdep_assert_held(&kbdev->pm.lock); + /* Range: 0 < counter_value < 2^27 */ + const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); - if (kbdev->ipa.vinstr_active) { - kbase_vinstr_suspend_client(model_data->vinstr_cli); - kbdev->ipa.vinstr_active = false; - } + /* Range: -2^49 < ret < 2^49 */ + return counter_value * (s64) coeff; } int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { + int errcode; struct kbase_device *kbdev = model_data->kbdev; - struct kbase_ioctl_hwcnt_reader_setup setup; - size_t dump_size; + struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; + struct kbase_hwcnt_enable_map enable_map; + const struct kbase_hwcnt_metadata *metadata = + kbase_hwcnt_virtualizer_metadata(hvirt); - dump_size = kbase_vinstr_dump_size(kbdev); - model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); - if (!model_data->vinstr_buffer) { - dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + if (!metadata) return -1; - } - setup.jm_bm = ~0u; - setup.shader_bm = ~0u; - setup.tiler_bm = ~0u; - setup.mmu_l2_bm = ~0u; - model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx, - &setup, model_data->vinstr_buffer); - if (!model_data->vinstr_cli) { - dev_err(kbdev->dev, "Failed to register IPA with vinstr core"); - kfree(model_data->vinstr_buffer); - model_data->vinstr_buffer = NULL; - return -1; + errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA enable map"); + return errcode; } - kbase_vinstr_hwc_clear(model_data->vinstr_cli); + kbase_hwcnt_enable_map_enable_all(&enable_map); - kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active; - kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle; - kbdev->ipa.model_data = model_data; - kbdev->ipa.vinstr_active = false; - /* Suspend vinstr, to ensure that the GPU is powered off until there is - * something to execute. - */ - kbase_vinstr_suspend_client(model_data->vinstr_cli); + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &enable_map, &model_data->hvirt_cli); + kbase_hwcnt_enable_map_free(&enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); + model_data->hvirt_cli = NULL; + return errcode; + } + + errcode = kbase_hwcnt_dump_buffer_alloc( + metadata, &model_data->dump_buf); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + model_data->hvirt_cli = NULL; + return errcode; + } return 0; } void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) { - struct kbase_device *kbdev = model_data->kbdev; - - kbdev->ipa.gpu_active_callback = NULL; - kbdev->ipa.gpu_idle_callback = NULL; - kbdev->ipa.model_data = NULL; - kbdev->ipa.vinstr_active = false; - - if (model_data->vinstr_cli) - kbase_vinstr_detach_client(model_data->vinstr_cli); - - model_data->vinstr_cli = NULL; - kfree(model_data->vinstr_buffer); - model_data->vinstr_buffer = NULL; + if (model_data->hvirt_cli) { + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); + model_data->hvirt_cli = NULL; + } } int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) { struct kbase_ipa_model_vinstr_data *model_data = (struct kbase_ipa_model_vinstr_data *)model->model_data; - struct kbase_device *kbdev = model_data->kbdev; s64 energy = 0; size_t i; u64 coeff = 0, coeff_mul = 0; + u64 start_ts_ns, end_ts_ns; u32 active_cycles; int err = 0; - if (!kbdev->ipa.vinstr_active) - goto err0; /* GPU powered off - no counters to collect */ - - err = kbase_vinstr_hwc_dump(model_data->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL); + err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, + &start_ts_ns, &end_ts_ns, &model_data->dump_buf); if (err) goto err0; @@ -256,12 +234,27 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) */ coeff = div_u64(coeff, active_cycles); - /* Scale by user-specified factor (where unity is 1000). - * Range: 0 <= coeff_mul < 2^61 + /* Not all models were derived at the same reference voltage. Voltage + * scaling is done by multiplying by V^2, so we need to *divide* by + * Vref^2 here. + * Range: 0 <= coeff <= 2^49 + */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + /* Range: 0 <= coeff <= 2^52 */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + + /* Scale by user-specified integer factor. + * Range: 0 <= coeff_mul < 2^57 */ coeff_mul = coeff * model_data->scaling_factor; - /* Range: 0 <= coeff_mul < 2^51 */ + /* The power models have results with units + * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this + * becomes fW/(Hz V^2), which are the units of coeff_mul. However, + * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide + * by 1000. + * Range: 0 <= coeff_mul < 2^47 + */ coeff_mul = div_u64(coeff_mul, 1000u); err0: @@ -273,7 +266,8 @@ err0: int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, const struct kbase_ipa_group *ipa_groups_def, size_t ipa_group_size, - kbase_ipa_get_active_cycles_callback get_active_cycles) + kbase_ipa_get_active_cycles_callback get_active_cycles, + s32 reference_voltage) { int err = 0; size_t i; @@ -318,6 +312,13 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, if (err) goto exit; + model_data->reference_voltage = reference_voltage; + err = kbase_ipa_model_add_param_s32(model, "reference_voltage", + &model_data->reference_voltage, + 1, false); + if (err) + goto exit; + err = kbase_ipa_attach_vinstr(model_data); exit: diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h index c9288e8c01f7..46e3cd4bc6e1 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h @@ -24,6 +24,8 @@ #define _KBASE_IPA_VINSTR_COMMON_H_ #include "mali_kbase.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" /* Maximum number of IPA groups for an IPA model. */ #define KBASE_IPA_MAX_GROUP_DEF_NUM 16 @@ -49,12 +51,15 @@ typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinst * @groups_def_num: Number of elements in the array of IPA groups. * @get_active_cycles: Callback to return number of active cycles during * counter sample period - * @vinstr_cli: vinstr client handle - * @vinstr_buffer: buffer to dump hardware counters onto - * @scaling_factor: user-specified power scaling factor. This is - * interpreted as a fraction where the denominator is - * 1000. Range approx 0.0-32.0: - * 0 < scaling_factor < 2^15 + * @hvirt_cli: hardware counter virtualizer client handle + * @dump_buf: buffer to dump hardware counters onto + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. Range approx + * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 + * @scaling_factor: User-specified power scaling factor. This is an + * integer, which is multiplied by the power coefficient + * just before OPP scaling. + * Range approx 0-32: 0 < scaling_factor < 2^5 * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of * cycles the GPU was working) is less than * min_sample_cycles, the counter model will return an @@ -69,8 +74,9 @@ struct kbase_ipa_model_vinstr_data { const struct kbase_ipa_group *groups_def; size_t groups_def_num; kbase_ipa_get_active_cycles_callback get_active_cycles; - struct kbase_vinstr_client *vinstr_cli; - void *vinstr_buffer; + struct kbase_hwcnt_virtualizer_client *hvirt_cli; + struct kbase_hwcnt_dump_buffer dump_buf; + s32 reference_voltage; s32 scaling_factor; s32 min_sample_cycles; }; @@ -92,11 +98,12 @@ struct kbase_ipa_group { }; /** - * sum_all_shader_cores() - sum a counter over all cores - * @model_data pointer to model data - * @coeff model coefficient. Unity is ~2^20, so range approx - * +/- 4.0: -2^22 < coeff < 2^22 - * @counter offset in bytes of the counter used to calculate energy for IPA group + * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter offset in bytes of the counter used to calculate energy + * for IPA group * * Calculate energy estimation based on hardware counter `counter' * across all shader cores. @@ -108,11 +115,29 @@ s64 kbase_ipa_sum_all_shader_cores( s32 coeff, u32 counter); /** - * sum_single_counter() - sum a single counter - * @model_data pointer to model data - * @coeff model coefficient. Unity is ~2^20, so range approx - * +/- 4.0: -2^22 < coeff < 2^22 - * @counter offset in bytes of the counter used to calculate energy for IPA group + * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' across all + * memory system blocks. + * + * Return: Sum of counter values. Range: -2^51 < ret < 2^51 + */ +s64 kbase_ipa_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * kbase_ipa_single_counter() - sum a single counter + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group * * Calculate energy estimation based on hardware counter `counter'. * @@ -164,6 +189,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); * @ipa_group_size: number of elements in the array @ipa_groups_def * @get_active_cycles: callback to return the number of cycles the GPU was * active during the counter sample period. + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. * * This initialization function performs initialization steps common * for ipa models based on counter values. In each call, the model @@ -175,7 +202,8 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, const struct kbase_ipa_group *ipa_groups_def, size_t ipa_group_size, - kbase_ipa_get_active_cycles_callback get_active_cycles); + kbase_ipa_get_active_cycles_callback get_active_cycles, + s32 reference_voltage); /** * kbase_ipa_vinstr_common_model_term() - terminate ipa power model diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c index 7951b7475a18..6365d2f2d9f6 100755 --- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c +++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c @@ -30,8 +30,6 @@ #define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) #define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) #define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define SC0_BASE_ONE_MEMSYS (3 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define SC0_BASE_TWO_MEMSYS (4 * KBASE_IPA_NR_BYTES_PER_BLOCK) /* JM counter block offsets */ #define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) @@ -55,10 +53,6 @@ #define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) #define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) -/** Maximum number of cores for which a single Memory System block of performance counters is present. */ -#define KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ((u8)4) - - /** * get_jm_counter() - get performance counter offset inside the Job Manager block * @model_data: pointer to GPU model data. @@ -98,9 +92,9 @@ static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinst static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, u32 counter_block_offset) { - const u32 sc_base = model_data->kbdev->gpu_props.num_cores <= KBASE_G7x_SINGLE_MEMSYS_MAX_NUM_CORES ? - SC0_BASE_ONE_MEMSYS : - SC0_BASE_TWO_MEMSYS; + const u32 sc_base = MEMSYS_BASE + + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * + KBASE_IPA_NR_BYTES_PER_BLOCK); return sc_base + counter_block_offset; } @@ -113,7 +107,7 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da * * Return: Energy estimation for a single Memory System performance counter. */ -static s64 kbase_g7x_memsys_single_counter( +static s64 kbase_g7x_sum_all_memsys_blocks( struct kbase_ipa_model_vinstr_data *model_data, s32 coeff, u32 offset) @@ -121,7 +115,7 @@ static s64 kbase_g7x_memsys_single_counter( u32 counter; counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset); - return kbase_ipa_single_counter(model_data, coeff, counter); + return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); } /** @@ -192,7 +186,7 @@ static const struct kbase_ipa_group ipa_groups_def_g71[] = { { .name = "l2_access", .default_value = 526300, - .op = kbase_g7x_memsys_single_counter, + .op = kbase_g7x_sum_all_memsys_blocks, .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, }, { @@ -225,7 +219,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = { { .name = "l2_access", .default_value = 393000, - .op = kbase_g7x_memsys_single_counter, + .op = kbase_g7x_sum_all_memsys_blocks, .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, }, { @@ -254,7 +248,7 @@ static const struct kbase_ipa_group ipa_groups_def_g72[] = { }, }; -static const struct kbase_ipa_group ipa_groups_def_tnox[] = { +static const struct kbase_ipa_group ipa_groups_def_g76[] = { { .name = "gpu_active", .default_value = 122000, @@ -282,12 +276,87 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = { { .name = "l2_access", .default_value = 378100, - .op = kbase_g7x_memsys_single_counter, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { + { + .name = "gpu_active", + .default_value = 224200, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 384700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 271900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 477700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 551400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g51[] = { + { + .name = "gpu_active", + .default_value = 201400, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 392700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 274000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 528000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 506400, + .op = kbase_g7x_sum_all_memsys_blocks, .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, }, }; -#define STANDARD_POWER_MODEL(gpu) \ +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ static int kbase_ ## gpu ## _power_model_init(\ struct kbase_ipa_model *model) \ { \ @@ -296,16 +365,19 @@ static const struct kbase_ipa_group ipa_groups_def_tnox[] = { return kbase_ipa_vinstr_common_model_init(model, \ ipa_groups_def_ ## gpu, \ ARRAY_SIZE(ipa_groups_def_ ## gpu), \ - kbase_g7x_get_active_cycles); \ + kbase_g7x_get_active_cycles, \ + (reference_voltage)); \ } \ - struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_ ## gpu ## _power_model_init, \ - .term = kbase_ipa_vinstr_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ - }; \ - KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + IPA_POWER_MODEL_OPS(gpu, gpu) + +#define ALIAS_POWER_MODEL(gpu, as_gpu) \ + IPA_POWER_MODEL_OPS(gpu, as_gpu) + +STANDARD_POWER_MODEL(g71, 800); +STANDARD_POWER_MODEL(g72, 800); +STANDARD_POWER_MODEL(g76, 800); +STANDARD_POWER_MODEL(g52_r1, 1000); +STANDARD_POWER_MODEL(g51, 1000); -STANDARD_POWER_MODEL(g71); -STANDARD_POWER_MODEL(g72); -STANDARD_POWER_MODEL(tnox); +/* g52 is an alias of g76 (TNOX) for IPA */ +ALIAS_POWER_MODEL(g52, g76); diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 10da0c58e9eb..5571f846c23e 100755 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -59,6 +59,8 @@ enum base_hw_feature { BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -203,6 +205,7 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -230,6 +233,7 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -257,6 +261,7 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -284,6 +289,7 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_PROTECTED_MODE, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -314,6 +320,7 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -344,6 +351,7 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -398,6 +406,94 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tNAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tBEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tULx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; @@ -425,6 +521,63 @@ static const enum base_hw_feature base_hw_features_tBOx[] = { BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tIDx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tVAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END }; diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index bcd6c5ffbb25..d7c40ef9e9c7 100755 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -97,6 +97,7 @@ enum base_hw_issue { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T720_1386, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, @@ -111,6 +112,7 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_7940, BASE_HW_ISSUE_TMIX_8042, @@ -122,8 +124,11 @@ enum base_hw_issue { BASE_HW_ISSUE_TMIX_8456, GPUCORE_1619, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TGOX_R1_1234, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; @@ -190,6 +195,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, @@ -231,6 +237,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_TMIX_8438, @@ -268,6 +275,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -303,6 +311,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_TMIX_8438, @@ -327,6 +336,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -350,6 +360,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_TMIX_8438, @@ -528,6 +539,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3964, @@ -548,6 +560,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T720_1386, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -569,6 +582,7 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_11056, BASE_HW_ISSUE_T720_1386, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -835,6 +849,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_END @@ -855,6 +870,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_END @@ -870,6 +886,7 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_TMIX_8438, @@ -893,6 +910,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_END @@ -914,6 +932,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_END @@ -934,6 +953,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8438, BASE_HW_ISSUE_END @@ -948,6 +968,7 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3979, + BASE_HW_ISSUE_T83X_817, BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END @@ -967,6 +988,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -984,6 +1006,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1001,6 +1024,7 @@ static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { BASE_HW_ISSUE_TMIX_8463, BASE_HW_ISSUE_TMIX_8456, BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1015,6 +1039,7 @@ static const enum base_hw_issue base_hw_issues_model_tMIx[] = { BASE_HW_ISSUE_TMIX_8206, BASE_HW_ISSUE_TMIX_8343, BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1025,6 +1050,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1035,6 +1061,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1045,6 +1072,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1054,6 +1082,7 @@ static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1063,6 +1092,7 @@ static const enum base_hw_issue base_hw_issues_model_tHEx[] = { BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_TMIX_8042, BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1071,6 +1101,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1079,6 +1110,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1087,6 +1119,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { BASE_HW_ISSUE_11054, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1094,6 +1127,7 @@ static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1102,6 +1136,7 @@ static const enum base_hw_issue base_hw_issues_model_tSIx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1109,6 +1144,7 @@ static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1117,6 +1153,7 @@ static const enum base_hw_issue base_hw_issues_model_tDVx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1124,6 +1161,7 @@ static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_END }; @@ -1133,6 +1171,7 @@ static const enum base_hw_issue base_hw_issues_model_tNOx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1140,6 +1179,7 @@ static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TNOX_1194, BASE_HW_ISSUE_END }; @@ -1148,6 +1188,8 @@ static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TGOX_R1_1234, BASE_HW_ISSUE_END }; @@ -1156,6 +1198,7 @@ static const enum base_hw_issue base_hw_issues_model_tGOx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1163,6 +1206,7 @@ static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1171,13 +1215,15 @@ static const enum base_hw_issue base_hw_issues_model_tKAx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; @@ -1185,7 +1231,59 @@ static const enum base_hw_issue base_hw_issues_model_tTRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tNAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tBEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tULx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; @@ -1193,6 +1291,8 @@ static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; @@ -1201,6 +1301,42 @@ static const enum base_hw_issue base_hw_issues_model_tBOx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tIDx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; @@ -1208,6 +1344,7 @@ static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; @@ -1216,6 +1353,7 @@ static const enum base_hw_issue base_hw_issues_model_tEGx[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TMIX_8133, BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_END }; diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index e53528e29fe0..70dc3c5d4917 100755 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -36,7 +36,6 @@ typedef struct base_mem_handle { } base_mem_handle; #include "mali_base_mem_priv.h" -#include "mali_kbase_profiling_gator_api.h" #include "mali_midg_coherency.h" #include "mali_kbase_gpu_id.h" @@ -127,18 +126,19 @@ typedef u32 base_mem_alloc_flags; */ #define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - /* BASE_MEM_HINT flags have been removed, but their values are reserved - * for backwards compatibility with older user-space drivers. The values - * can be re-used once support for r5p0 user-space drivers is removed, - * presumably in r7p0. - * - * RESERVED: (1U << 5) - * RESERVED: (1U << 6) - * RESERVED: (1U << 7) - * RESERVED: (1U << 8) - */ -#define BASE_MEM_RESERVED_BIT_5 ((base_mem_alloc_flags)1 << 5) -#define BASE_MEM_RESERVED_BIT_6 ((base_mem_alloc_flags)1 << 6) +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + #define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7) #define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) @@ -192,6 +192,7 @@ typedef u32 base_mem_alloc_flags; * Do not remove, use the next unreserved bit for new flags */ #define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) +#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19 /** * Memory starting from the end of the initial commit is aligned to 'extent' @@ -200,11 +201,20 @@ typedef u32 base_mem_alloc_flags; */ #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode. + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + * The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + /* Number of bits used as flags for base memory management * * Must be kept in sync with the base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 21 +#define BASE_MEM_FLAGS_NR_BITS 22 /* A mask for all output bits, excluding IN/OUT bits. */ @@ -226,9 +236,13 @@ typedef u32 base_mem_alloc_flags; /* A mask of all currently reserved flags */ #define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_5 | BASE_MEM_RESERVED_BIT_6 | \ - BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \ - BASE_MEM_RESERVED_BIT_19) + (BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \ + BASE_MEM_MAYBE_RESERVED_BIT_19) + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING) /* A mask of all the flags that can be returned via the base_mem_get_flags() * interface. @@ -236,7 +250,8 @@ typedef u32 base_mem_alloc_flags; #define BASE_MEM_FLAGS_QUERYABLE \ (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ - BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED)) + BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ + BASE_MEM_FLAGS_KERNEL_ONLY)) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -304,13 +319,15 @@ struct base_mem_import_user_buffer { #define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) #define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) #define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-64< for future special handles */ +/* reserved handles ..-48< for future special handles */ #define BASE_MEM_COOKIE_BASE (64ul << 12) #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ BASE_MEM_COOKIE_BASE) /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL +/* Mask to detect 4GB boundary (in page units) alignment */ +#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) /** * Limit on the 'extent' parameter for an allocation with the @@ -326,15 +343,9 @@ struct base_mem_import_user_buffer { /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ +/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ +#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ -/** - * @brief Result codes of changing the size of the backing store allocated to a tmem region - */ -typedef enum base_backing_threshold_status { - BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ - BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ - BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ -} base_backing_threshold_status; /** * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs @@ -643,9 +654,10 @@ typedef u32 base_jd_core_req; /** * SW only requirement: Just In Time allocation * - * This job requests a JIT allocation based on the request in the - * @base_jit_alloc_info structure which is passed via the jc element of - * the atom. + * This job requests a single or multiple JIT allocations through a list + * of @base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of @base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom * * It should be noted that the id entry in @base_jit_alloc_info must not * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. @@ -659,9 +671,9 @@ typedef u32 base_jd_core_req; /** * SW only requirement: Just In Time free * - * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC - * to be freed. The ID of the JIT allocation is passed via the jc element of - * the atom. + * This job requests a single or multiple JIT allocations created by + * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT + * allocations is passed via the jc element of the atom. * * The job will complete immediately. */ @@ -776,45 +788,6 @@ typedef u32 base_jd_core_req; ((core_req & BASE_JD_REQ_SOFT_JOB) || \ (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) -/** - * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which - * handles retaining cores for power management and affinity management. - * - * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack - * where lots of atoms could be submitted before powerup, and each has an - * affinity chosen that causes other atoms to have an affinity - * violation. Whilst the affinity was not causing violations at the time it - * was chosen, it could cause violations thereafter. For example, 1000 jobs - * could have had their affinity chosen during the powerup time, so any of - * those 1000 jobs could cause an affinity violation later on. - * - * The attack would otherwise occur because other atoms/contexts have to wait for: - * -# the currently running atoms (which are causing the violation) to - * finish - * -# and, the atoms that had their affinity chosen during powerup to - * finish. These are run preferentially because they don't cause a - * violation, but instead continue to cause the violation in others. - * -# or, the attacker is scheduled out (which might not happen for just 2 - * contexts) - * - * By re-choosing the affinity (which is designed to avoid violations at the - * time it's chosen), we break condition (2) of the wait, which minimizes the - * problem to just waiting for current jobs to finish (which can be bounded if - * the Job Scheduling Policy has a timer). - */ -enum kbase_atom_coreref_state { - /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */ - KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, - /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */ - KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, - /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */ - KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY, - /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */ - KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS, - /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */ - KBASE_ATOM_COREREF_STATE_READY -}; - /* * Base Atom priority * @@ -822,15 +795,16 @@ enum kbase_atom_coreref_state { * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority * level that is not one of those defined below. * - * Priority levels only affect scheduling between atoms of the same type within - * a base context, and only after the atoms have had dependencies resolved. - * Fragment atoms does not affect non-frament atoms with lower priorities, and - * the other way around. For example, a low priority atom that has had its - * dependencies resolved might run before a higher priority atom that has not - * had its dependencies resolved. + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. * - * The scheduling between base contexts/processes and between atoms from - * different base contexts/processes is unaffected by atom priority. + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. * * The atoms are scheduled as follows with respect to their priorities: * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies @@ -842,6 +816,14 @@ enum kbase_atom_coreref_state { * - Any two atoms that have the same priority could run in any order with * respect to each other. That is, there is no ordering constraint between * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. */ typedef u8 base_jd_prio; @@ -889,7 +871,7 @@ typedef struct base_jd_atom_v2 { u64 jc; /**< job-chain GPU address */ struct base_jd_udata udata; /**< user data */ u64 extres_list; /**< list of external resources */ - u16 nr_extres; /**< nr of external resources */ + u16 nr_extres; /**< nr of external resources or JIT allocations */ u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, this is done in order to reduce possibility of improper assigment of a dependency field */ @@ -1642,20 +1624,21 @@ typedef u32 base_context_create_flags; #define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ ((base_context_create_flags)1 << 1) + /** * Bitpattern describing the ::base_context_create_flags that can be * passed to base_context_init() */ #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ - (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ - ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) + (BASE_CONTEXT_CCTX_EMBEDDED | \ + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) /** * Bitpattern describing the ::base_context_create_flags that can be * passed to the kernel */ #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ - ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED /* * Private flags used on the base context @@ -1765,10 +1748,6 @@ typedef struct base_jd_replay_jc { /** @} end group base_api */ -typedef struct base_profiling_controls { - u32 profiling_controls[FBDUMP_CONTROL_MAX]; -} base_profiling_controls; - /* Enable additional tracepoints for latency measurements (TL_ATOM_READY, * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ #define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) @@ -1780,4 +1759,5 @@ typedef struct base_profiling_controls { #define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ BASE_TLSTREAM_JOB_DUMPING_ENABLED) + #endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h deleted file mode 100755 index 5e8add8838f2..000000000000 --- a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#ifndef _BASE_VENDOR_SPEC_FUNC_H_ -#define _BASE_VENDOR_SPEC_FUNC_H_ - -int kbase_get_vendor_specific_cpu_clock_speed(u32 * const); - -#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index cd711b816162..ca245b44b02f 100755 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -59,7 +59,6 @@ #include "mali_kbase_context.h" #include "mali_kbase_strings.h" #include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" #include "mali_kbase_utility.h" #include "mali_kbase_mem.h" @@ -69,7 +68,7 @@ #include "mali_kbase_jd_debugfs.h" #include "mali_kbase_gpuprops.h" #include "mali_kbase_jm.h" -#include "mali_kbase_vinstr.h" +#include "mali_kbase_ioctl.h" #include "ipa/mali_kbase_ipa.h" @@ -77,6 +76,7 @@ #include #endif + #ifndef u64_to_user_ptr /* Introduced in Linux v4.6 */ #define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) @@ -109,8 +109,6 @@ int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); struct kbase_device *kbase_find_device(int minor); void kbase_release_device(struct kbase_device *kbdev); -void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); - /** * kbase_get_unmapped_area() - get an address range which is currently @@ -239,6 +237,70 @@ void kbase_event_close(struct kbase_context *kctx); void kbase_event_cleanup(struct kbase_context *kctx); void kbase_event_wakeup(struct kbase_context *kctx); +/** + * kbasep_jit_alloc_validate() - Validate the JIT allocation info. + * + * @kctx: Pointer to the kbase context within which the JIT + * allocation is to be validated. + * @info: Pointer to struct @base_jit_alloc_info + * which is to be validated. + * @return: 0 if jit allocation is valid; negative error code otherwise + */ +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); +/** + * kbase_free_user_buffer() - Free memory allocated for struct + * @kbase_debug_copy_buffer. + * + * @buffer: Pointer to the memory location allocated for the object + * of the type struct @kbase_debug_copy_buffer. + */ +static inline void kbase_free_user_buffer( + struct kbase_debug_copy_buffer *buffer) +{ + struct page **pages = buffer->extres_pages; + int nr_pages = buffer->nr_extres_pages; + + if (pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *pg = pages[i]; + + if (pg) + put_page(pg); + } + kfree(pages); + } +} + +/** + * kbase_mem_copy_from_extres_page() - Copy pages from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @extres_pages: Pointer to the pages which correspond to the external + * resources from which the copying will take place. + * @pages: Pointer to the pages to which the content is to be + * copied from the provided external resources. + * @nr_pages: Number of pages to copy. + * @target_page_nr: Number of target pages which will be used for copying. + * @offset: Offset into the target pages from which the copying + * is to be performed. + * @to_copy: Size of the chunk to be copied, in bytes. + */ +void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, + void *extres_page, struct page **pages, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset, size_t *to_copy); +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + */ +int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data); int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); @@ -257,12 +319,6 @@ bool kbase_replay_process(struct kbase_jd_atom *katom); void kbasep_soft_job_timeout_worker(struct timer_list *timer); void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); -/* api used internally for register access. Contains validation and tracing */ -void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); -int kbase_device_trace_buffer_install( - struct kbase_context *kctx, u32 *tb, size_t size); -void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); - void kbasep_as_do_poke(struct work_struct *work); /** Returns the name associated with a Mali exception code @@ -292,6 +348,20 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) return kbdev->pm.suspending; } +/** + * kbase_pm_is_active - Determine whether the GPU is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This takes into account whether there is an active context reference. + * + * Return: true if the GPU is active, false otherwise + */ +static inline bool kbase_pm_is_active(struct kbase_device *kbdev) +{ + return kbdev->pm.active_count > 0; +} + /** * Return the atom's ID, as was originally supplied by userspace in * base_jd_atom_v2::atom_number diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c index f3e71d1a40d0..8d71926ea575 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c +++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c @@ -74,7 +74,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; - p = phys_to_page(as_phys_addr_t(page_array[page_index])); + p = as_page(page_array[page_index]); /* we need the first 10 words of the fragment shader job descriptor. * We need to check that the offset + 10 words is less that the page @@ -98,7 +98,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) /* The data needed overflows page the dimension, * need to map the subsequent page */ if (copy_size < JOB_HEADER_SIZE) { - p = phys_to_page(as_phys_addr_t(page_array[page_index + 1])); + p = as_page(page_array[page_index + 1]); page_2 = kmap_atomic(p); kbase_sync_single_for_cpu(katom->kctx->kbdev, @@ -181,7 +181,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) /* Flush CPU cache to update memory for future GPU reads*/ memcpy(page_1, dst, copy_size); - p = phys_to_page(as_phys_addr_t(page_array[page_index])); + p = as_page(page_array[page_index]); kbase_sync_single_for_device(katom->kctx->kbdev, kbase_dma_addr(p) + offset, @@ -190,8 +190,7 @@ int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) if (copy_size < JOB_HEADER_SIZE) { memcpy(page_2, dst + copy_size, JOB_HEADER_SIZE - copy_size); - p = phys_to_page(as_phys_addr_t(page_array[page_index + - 1])); + p = as_page(page_array[page_index + 1]); kbase_sync_single_for_device(katom->kctx->kbdev, kbase_dma_addr(p), diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c index 2e99a4d8ab1c..4cc93a95a456 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -48,7 +48,7 @@ static int kbase_as_fault_read(struct seq_file *sfile, void *data) /* output the last page fault addr */ seq_printf(sfile, "%llu\n", - (u64) kbdev->as[as_no].fault_addr); + (u64) kbdev->as[as_no].pf_data.addr); } } @@ -87,7 +87,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) kbdev->debugfs_as_read_bitmap = 0ULL; KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); - KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64)); + KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); debugfs_directory = debugfs_create_dir("address_spaces", kbdev->mali_debugfs_directory); diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index 18444b8a9c63..27a03cf02138 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,12 @@ /* * The output flags should be a combination of the following values: - * KBASE_REG_CPU_CACHED: CPU cache should be enabled. + * KBASE_REG_CPU_CACHED: CPU cache should be enabled + * KBASE_REG_GPU_CACHED: GPU cache should be enabled + * + * NOTE: Some components within the GPU might only be able to access memory + * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for + * more details. */ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) { @@ -38,6 +43,9 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) CSTD_UNUSED(nr_pages); + if (!(flags & BASE_MEM_UNCACHED_GPU)) + cache_flags |= KBASE_REG_GPU_CACHED; + if (flags & BASE_MEM_CACHED_CPU) cache_flags |= KBASE_REG_CPU_CACHED; diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index 376a94bb8b8a..bb2ab53f06f1 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -170,11 +170,6 @@ enum { */ #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ -/** - * Power Manager number of ticks before GPU is powered off - */ -#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ - /** * Default scheduling tick granuality */ @@ -264,7 +259,7 @@ enum { * Note that as this prevents kbase from powering down shader cores, this limits * the available power policies to coarse_demand and always_on. */ -#define PLATFORM_POWER_DOWN_ONLY (1) +#define PLATFORM_POWER_DOWN_ONLY (0) /** * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 127d9bfa31c7..59609d7e5c5d 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -60,9 +60,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); #endif /* !defined(CONFIG_64BIT) */ -#ifdef CONFIG_MALI_TRACE_TIMELINE - kctx->timeline.owner_tgid = task_tgid_nr(current); -#endif atomic_set(&kctx->setup_complete, 0); atomic_set(&kctx->setup_in_progress, 0); spin_lock_init(&kctx->mm_update_lock); @@ -106,34 +103,24 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_jd; + atomic_set(&kctx->drain_pending, 0); mutex_init(&kctx->reg_lock); - mutex_init(&kctx->mem_partials_lock); + spin_lock_init(&kctx->mem_partials_lock); INIT_LIST_HEAD(&kctx->mem_partials); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); spin_lock_init(&kctx->waiting_soft_jobs_lock); err = kbase_dma_fence_init(kctx); if (err) - goto free_event; + goto free_kcpu_wq; - err = kbase_mmu_init(kctx); + err = kbase_mmu_init(kbdev, &kctx->mmu, kctx); if (err) goto term_dma_fence; - do { - err = kbase_mem_pool_grow(&kctx->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - if (err) - goto pgd_no_mem; - - mutex_lock(&kctx->mmu_lock); - kctx->pgd = kbase_mmu_alloc_pgd(kctx); - mutex_unlock(&kctx->mmu_lock); - } while (!kctx->pgd); - p = kbase_mem_alloc_page(&kctx->mem_pool); if (!p) goto no_sink_page; @@ -143,6 +130,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->cookies = KBASE_COOKIE_MASK; + /* Make sure page 0 is not used... */ err = kbase_region_tracker_init(kctx); if (err) @@ -158,13 +146,10 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif -#ifdef CONFIG_MALI_TRACE_TIMELINE - atomic_set(&kctx->timeline.jd_atoms_in_flight, 0); -#endif kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; - mutex_init(&kctx->vinstr_cli_lock); + mutex_init(&kctx->legacy_hwcnt_lock); kbase_timer_setup(&kctx->soft_job_timeout, kbasep_soft_job_timeout_worker); @@ -180,15 +165,10 @@ no_sticky: no_region_tracker: kbase_mem_pool_free(&kctx->mem_pool, p, false); no_sink_page: - /* VM lock needed for the call to kbase_mmu_free_pgd */ - kbase_gpu_vm_lock(kctx); - kbase_mmu_free_pgd(kctx); - kbase_gpu_vm_unlock(kctx); -pgd_no_mem: - kbase_mmu_term(kctx); + kbase_mmu_term(kbdev, &kctx->mmu); term_dma_fence: kbase_dma_fence_term(kctx); -free_event: +free_kcpu_wq: kbase_event_cleanup(kctx); free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ @@ -207,9 +187,10 @@ out: } KBASE_EXPORT_SYMBOL(kbase_create_context); -static void kbase_reg_pending_dtor(struct kbase_va_region *reg) +static void kbase_reg_pending_dtor(struct kbase_device *kbdev, + struct kbase_va_region *reg) { - dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n"); + dev_dbg(kbdev->dev, "Freeing pending unmapped region\n"); kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); kfree(reg); @@ -239,15 +220,27 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_jd_zap_context(kctx); + /* We have already waited for the jobs to complete (and hereafter there + * can be no more submissions for the context). However the wait could + * have timedout and there could still be work items in flight that + * would do the completion processing of jobs. + * kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq + * will cause it do drain and implicitly wait for those work items to + * complete. + */ + kbase_jd_exit(kctx); + #ifdef CONFIG_DEBUG_FS /* Removing the rest of the debugfs entries here as we want to keep the * atom debugfs interface alive until all atoms have completed. This * is useful for debugging hung contexts. */ debugfs_remove_recursive(kctx->kctx_dentry); + kbase_debug_job_fault_context_term(kctx); #endif kbase_event_cleanup(kctx); + /* * JIT must be terminated before the code below as it must be called * without the region lock being held. @@ -260,11 +253,8 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_sticky_resource_term(kctx); - /* MMU is disabled as part of scheduling out the context */ - kbase_mmu_free_pgd(kctx); - /* drop the aliasing sink page now that it can't be mapped anymore */ - p = phys_to_page(as_phys_addr_t(kctx->aliasing_sink_page)); + p = as_page(kctx->aliasing_sink_page); kbase_mem_pool_free(&kctx->mem_pool, p, false); /* free pending region setups */ @@ -274,7 +264,7 @@ void kbase_destroy_context(struct kbase_context *kctx) BUG_ON(!kctx->pending_regions[cookie]); - kbase_reg_pending_dtor(kctx->pending_regions[cookie]); + kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]); kctx->pending_regions[cookie] = NULL; pending_regions_to_clean &= ~(1UL << cookie); @@ -283,11 +273,10 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_region_tracker_term(kctx); kbase_gpu_vm_unlock(kctx); + /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); - kbase_jd_exit(kctx); - kbase_dma_fence_term(kctx); mutex_lock(&kbdev->mmu_hw_mutex); @@ -296,7 +285,7 @@ void kbase_destroy_context(struct kbase_context *kctx) spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_term(kctx); + kbase_mmu_term(kbdev, &kctx->mmu); pages = atomic_read(&kctx->used_pages); if (pages != 0) @@ -336,9 +325,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); - /* Latch the initial attributes into the Job Scheduler */ - kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); out: diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index d696b404f378..382285f060b5 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -48,9 +48,14 @@ #include #include #include +#include #include "mali_kbase_ioctl.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_vinstr.h" -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT #include "mali_kbase_gwt.h" #endif @@ -161,23 +166,25 @@ enum { #endif /* CONFIG_MALI_DEVFREQ */ inited_tlstream = (1u << 4), inited_backend_early = (1u << 5), - inited_backend_late = (1u << 6), - inited_device = (1u << 7), - inited_vinstr = (1u << 8), - - inited_job_fault = (1u << 10), - inited_sysfs_group = (1u << 11), - inited_misc_register = (1u << 12), - inited_get_device = (1u << 13), - inited_dev_list = (1u << 14), - inited_debugfs = (1u << 15), - inited_gpu_device = (1u << 16), - inited_registers_map = (1u << 17), - inited_io_history = (1u << 18), - inited_power_control = (1u << 19), - inited_buslogger = (1u << 20), - inited_protected = (1u << 21), - inited_ctx_sched = (1u << 22) + inited_hwcnt_gpu_iface = (1u << 6), + inited_hwcnt_gpu_ctx = (1u << 7), + inited_hwcnt_gpu_virt = (1u << 8), + inited_vinstr = (1u << 9), + inited_backend_late = (1u << 10), + inited_device = (1u << 11), + inited_job_fault = (1u << 13), + inited_sysfs_group = (1u << 14), + inited_misc_register = (1u << 15), + inited_get_device = (1u << 16), + inited_dev_list = (1u << 17), + inited_debugfs = (1u << 18), + inited_gpu_device = (1u << 19), + inited_registers_map = (1u << 20), + inited_io_history = (1u << 21), + inited_power_control = (1u << 22), + inited_buslogger = (1u << 23), + inited_protected = (1u << 24), + inited_ctx_sched = (1u << 25) }; static struct kbase_device *to_kbase_device(struct device *dev) @@ -282,9 +289,9 @@ EXPORT_SYMBOL(kbase_release_device); */ static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) { - char buf[32]; + char buf[4]; - count = min(sizeof(buf), count); + count = min(count, sizeof(buf) - 1); if (copy_from_user(buf, s, count)) return -EFAULT; @@ -407,6 +414,7 @@ static int kbase_open(struct inode *inode, struct file *filp) init_waitqueue_head(&kctx->event_queue); filp->private_data = kctx; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; kctx->filp = filp; if (kbdev->infinite_cache_active_default) @@ -478,7 +486,6 @@ static int kbase_release(struct inode *inode, struct file *filp) #ifdef CONFIG_DEBUG_FS kbasep_mem_profile_debugfs_remove(kctx); - kbase_debug_job_fault_context_term(kctx); #endif mutex_lock(&kbdev->kctx_list_lock); @@ -495,17 +502,13 @@ static int kbase_release(struct inode *inode, struct file *filp) filp->private_data = NULL; - mutex_lock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); /* If this client was performing hwcnt dumping and did not explicitly - * detach itself, remove it from the vinstr core now */ - if (kctx->vinstr_cli) { - struct kbase_ioctl_hwcnt_enable enable; - - enable.dump_buffer = 0llu; - kbase_vinstr_legacy_hwc_setup( - kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable); - } - mutex_unlock(&kctx->vinstr_cli_lock); + * detach itself, destroy it now + */ + kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + mutex_unlock(&kctx->legacy_hwcnt_lock); kbase_destroy_context(kctx); @@ -579,12 +582,32 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, u64 flags = alloc->in.flags; u64 gpu_va; + rcu_read_lock(); + /* Don't allow memory allocation until user space has set up the + * tracking page (which sets kctx->process_mm). Also catches when we've + * forked. + */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + + if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + /* Force SAME_VA if a 64-bit client. + * The only exception is GPU-executable memory if an EXEC_VA zone + * has been initialized. In that case, GPU-executable memory may + * or may not be SAME_VA. + */ if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { - /* force SAME_VA if a 64-bit client */ - flags |= BASE_MEM_SAME_VA; + if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) + flags |= BASE_MEM_SAME_VA; } + reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extent, @@ -615,13 +638,7 @@ static int kbase_api_mem_free(struct kbase_context *kctx, static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_reader_setup *setup) { - int ret; - - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); - mutex_unlock(&kctx->vinstr_cli_lock); - - return ret; + return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); } static int kbase_api_hwcnt_enable(struct kbase_context *kctx, @@ -629,10 +646,31 @@ static int kbase_api_hwcnt_enable(struct kbase_context *kctx, { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx, - &kctx->vinstr_cli, enable); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + if (enable->dump_buffer != 0) { + /* Non-zero dump buffer, so user wants to create the client */ + if (kctx->legacy_hwcnt_cli == NULL) { + ret = kbase_hwcnt_legacy_client_create( + kctx->kbdev->hwcnt_gpu_virt, + enable, + &kctx->legacy_hwcnt_cli); + } else { + /* This context already has a client */ + ret = -EBUSY; + } + } else { + /* Zero dump buffer, so user wants to destroy the client */ + if (kctx->legacy_hwcnt_cli != NULL) { + kbase_hwcnt_legacy_client_destroy( + kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + ret = 0; + } else { + /* This context has no client to destroy */ + ret = -EINVAL; + } + } + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -641,10 +679,9 @@ static int kbase_api_hwcnt_dump(struct kbase_context *kctx) { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -653,9 +690,9 @@ static int kbase_api_hwcnt_clear(struct kbase_context *kctx) { int ret; - mutex_lock(&kctx->vinstr_cli_lock); - ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli); - mutex_unlock(&kctx->vinstr_cli_lock); + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); return ret; } @@ -735,6 +772,12 @@ static int kbase_api_mem_jit_init(struct kbase_context *kctx, jit_init->max_allocations, jit_init->trim_level); } +static int kbase_api_mem_exec_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_exec_init *exec_init) +{ + return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); +} + static int kbase_api_mem_sync(struct kbase_context *kctx, struct kbase_ioctl_mem_sync *sync) { @@ -822,6 +865,10 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, } flags = alias->in.flags; + if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) { + vfree(ai); + return -EINVAL; + } alias->out.gpu_va = kbase_mem_alias(kctx, &flags, alias->in.stride, alias->in.nents, @@ -843,6 +890,9 @@ static int kbase_api_mem_import(struct kbase_context *kctx, int ret; u64 flags = import->in.flags; + if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + ret = kbase_mem_import(kctx, import->in.type, u64_to_user_ptr(import->in.phandle), @@ -859,6 +909,9 @@ static int kbase_api_mem_import(struct kbase_context *kctx, static int kbase_api_mem_flags_change(struct kbase_context *kctx, struct kbase_ioctl_mem_flags_change *change) { + if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + return kbase_mem_flags_change(kctx, change->gpu_va, change->flags, change->mask); } @@ -895,24 +948,6 @@ static int kbase_api_fence_validate(struct kbase_context *kctx, #endif } -static int kbase_api_get_profiling_controls(struct kbase_context *kctx, - struct kbase_ioctl_get_profiling_controls *controls) -{ - int ret; - - if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN)) - return -EINVAL; - - ret = copy_to_user(u64_to_user_ptr(controls->buffer), - &kctx->kbdev->kbase_profiling_controls[ - FBDUMP_CONTROL_MIN], - controls->count * sizeof(u32)); - - if (ret) - return -EFAULT; - return 0; -} - static int kbase_api_mem_profile_add(struct kbase_context *kctx, struct kbase_ioctl_mem_profile_add *data) { @@ -1039,6 +1074,7 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, } #endif /* MALI_UNIT_TEST */ + #define KBASE_HANDLE_IOCTL(cmd, function) \ do { \ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ @@ -1162,6 +1198,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kbase_api_mem_jit_init, struct kbase_ioctl_mem_jit_init); break; + case KBASE_IOCTL_MEM_EXEC_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, + kbase_api_mem_exec_init, + struct kbase_ioctl_mem_exec_init); + break; case KBASE_IOCTL_MEM_SYNC: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, kbase_api_mem_sync, @@ -1221,11 +1262,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kbase_api_fence_validate, struct kbase_ioctl_fence_validate); break; - case KBASE_IOCTL_GET_PROFILING_CONTROLS: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS, - kbase_api_get_profiling_controls, - struct kbase_ioctl_get_profiling_controls); - break; case KBASE_IOCTL_MEM_PROFILE_ADD: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, kbase_api_mem_profile_add, @@ -1273,7 +1309,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct kbase_ioctl_hwcnt_values); break; #endif -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT case KBASE_IOCTL_CINSTR_GWT_START: KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, kbase_gpu_gwt_start); @@ -1495,111 +1531,6 @@ static ssize_t set_policy(struct device *dev, struct device_attribute *attr, con */ static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); -/** - * show_ca_policy - Show callback for the core_availability_policy sysfs file. - * - * This function is called to get the contents of the core_availability_policy - * sysfs file. This is a list of the available policies with the currently - * active one surrounded by square brackets. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - const struct kbase_pm_ca_policy *current_policy; - const struct kbase_pm_ca_policy *const *policy_list; - int policy_count; - int i; - ssize_t ret = 0; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - current_policy = kbase_pm_ca_get_policy(kbdev); - - policy_count = kbase_pm_ca_list_policies(&policy_list); - - for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { - if (policy_list[i] == current_policy) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); - else - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); - } - - if (ret < PAGE_SIZE - 1) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); - } else { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/** - * set_ca_policy - Store callback for the core_availability_policy sysfs file. - * - * This function is called when the core_availability_policy sysfs file is - * written to. It matches the requested policy against the available policies - * and if a matching policy is found calls kbase_pm_set_policy() to change - * the policy. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - const struct kbase_pm_ca_policy *new_policy = NULL; - const struct kbase_pm_ca_policy *const *policy_list; - int policy_count; - int i; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - policy_count = kbase_pm_ca_list_policies(&policy_list); - - for (i = 0; i < policy_count; i++) { - if (sysfs_streq(policy_list[i]->name, buf)) { - new_policy = policy_list[i]; - break; - } - } - - if (!new_policy) { - dev_err(dev, "core_availability_policy: policy not found\n"); - return -EINVAL; - } - - kbase_pm_ca_set_policy(kbdev, new_policy); - - return count; -} - -/* - * The sysfs file core_availability_policy - * - * This is used for obtaining information about the available policies, - * determining which policy is currently active, and changing the active - * policy. - */ -static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy); - /* * show_core_mask - Show callback for the core_mask sysfs file. * @@ -1653,7 +1584,10 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, { struct kbase_device *kbdev; u64 new_core_mask[3]; - int items; + int items, i; + ssize_t err = count; + unsigned long flags; + u64 shader_present, group0_core_mask; kbdev = to_kbase_device(dev); @@ -1664,50 +1598,59 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, &new_core_mask[0], &new_core_mask[1], &new_core_mask[2]); + if (items != 1 && items != 3) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n" + "or \n"); + err = -EINVAL; + goto end; + } + if (items == 1) new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; - if (items == 1 || items == 3) { - u64 shader_present = - kbdev->gpu_props.props.raw_props.shader_present; - u64 group0_core_mask = - kbdev->gpu_props.props.coherency_info.group[0]. - core_mask; - - if ((new_core_mask[0] & shader_present) != new_core_mask[0] || - !(new_core_mask[0] & group0_core_mask) || - (new_core_mask[1] & shader_present) != - new_core_mask[1] || - !(new_core_mask[1] & group0_core_mask) || - (new_core_mask[2] & shader_present) != - new_core_mask[2] || - !(new_core_mask[2] & group0_core_mask)) { - dev_err(dev, "power_policy: invalid core specification\n"); - return -EINVAL; - } - - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != - new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != - new_core_mask[2]) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], - new_core_mask[1], new_core_mask[2]); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + shader_present = kbdev->gpu_props.props.raw_props.shader_present; + group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + for (i = 0; i < 3; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask[i], i, + kbdev->gpu_props.props.raw_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & group0_core_mask)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + new_core_mask[i], i, group0_core_mask); + err = -EINVAL; + goto unlock; } + } - return count; + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); } - dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" - "Use format \n" - "or \n"); - return -EINVAL; +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +end: + return err; } /* @@ -2410,7 +2353,7 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G51" }, { .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TNOx" }, + .name = "Mali-G76" }, { .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G31" }, { .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, @@ -2541,9 +2484,11 @@ static ssize_t set_pm_poweroff(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; int items; - s64 gpu_poweroff_time; - int poweroff_shader_ticks, poweroff_gpu_ticks; + u64 gpu_poweroff_time; + unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) @@ -2558,9 +2503,16 @@ static ssize_t set_pm_poweroff(struct device *dev, return -EINVAL; } - kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); - kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks; - kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); + stt->configured_ticks = poweroff_shader_ticks; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (poweroff_gpu_ticks != 0) + dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); return count; } @@ -2580,16 +2532,22 @@ static ssize_t show_pm_poweroff(struct device *dev, struct device_attribute *attr, char * const buf) { struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; ssize_t ret; + unsigned long flags; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n", - ktime_to_ns(kbdev->pm.gpu_poweroff_time), - kbdev->pm.poweroff_shader_ticks, - kbdev->pm.poweroff_gpu_ticks); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", + ktime_to_ns(stt->configured_interval), + stt->configured_ticks); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return ret; } @@ -3061,6 +3019,45 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { #endif /* CONFIG_DEBUG_FS */ +static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + protected_mode_hwcnt_disable_work); + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + + if (do_disable) { + /* Protected mode state did not change while we were doing the + * disable, so commit the work we just performed and continue + * the state machine. + */ + kbdev->protected_mode_hwcnt_disabled = true; + kbase_backend_slot_update(kbdev); + } else { + /* Protected mode state was updated while we were doing the + * disable, so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + static int kbasep_protected_mode_init(struct kbase_device *kbdev) { #ifdef CONFIG_OF @@ -3078,6 +3075,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev) kbdev->protected_dev->data = kbdev; kbdev->protected_ops = &kbase_native_protected_ops; kbdev->protected_mode_support = true; + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; + kbdev->protected_mode_hwcnt_disabled = false; return 0; } @@ -3127,8 +3128,10 @@ static int kbasep_protected_mode_init(struct kbase_device *kbdev) static void kbasep_protected_mode_term(struct kbase_device *kbdev) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); kfree(kbdev->protected_dev); + } } #ifdef CONFIG_MALI_NO_MALI @@ -3195,6 +3198,7 @@ static int registers_map(struct kbase_device * const kbdev) kbdev->reg_start = reg_res->start; kbdev->reg_size = resource_size(reg_res); + err = kbase_common_reg_map(kbdev); if (err) { dev_err(kbdev->dev, "Failed to map registers\n"); @@ -3310,9 +3314,9 @@ static void power_control_term(struct kbase_device *kbdev) #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ } +#ifdef MALI_KBASE_BUILD #ifdef CONFIG_DEBUG_FS -#if KBASE_GPU_RESET_EN #include static void trigger_quirks_reload(struct kbase_device *kbdev) @@ -3348,7 +3352,6 @@ MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); MAKE_QUIRK_ACCESSORS(jm); -#endif /* KBASE_GPU_RESET_EN */ /** * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read @@ -3370,7 +3373,7 @@ static ssize_t debugfs_protected_debug_mode_read(struct file *file, ssize_t ret_val; kbase_pm_context_active(kbdev); - gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL); + gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); kbase_pm_context_idle(kbdev); if (gpu_status & GPU_DBGEN) @@ -3429,7 +3432,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); kbase_as_fault_debugfs_init(kbdev); -#if KBASE_GPU_RESET_EN /* fops_* variables created by invocations of macro * MAKE_QUIRK_ACCESSORS() above. */ debugfs_create_file("quirks_sc", 0644, @@ -3444,7 +3446,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("quirks_jm", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_jm_quirks); -#endif /* KBASE_GPU_RESET_EN */ debugfs_create_bool("infinite_cache", 0644, debugfs_ctx_defaults_directory, @@ -3464,10 +3465,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_trace_debugfs_init(kbdev); #endif /* KBASE_TRACE_ENABLE */ -#ifdef CONFIG_MALI_TRACE_TIMELINE - kbasep_trace_timeline_debugfs_init(kbdev); -#endif /* CONFIG_MALI_TRACE_TIMELINE */ - #ifdef CONFIG_MALI_DEVFREQ #ifdef CONFIG_DEVFREQ_THERMAL if (kbdev->inited_subsys & inited_devfreq) @@ -3501,6 +3498,7 @@ static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } #endif /* CONFIG_DEBUG_FS */ +#endif /* MALI_KBASE_BUILD */ static void kbase_device_coherency_init(struct kbase_device *kbdev, unsigned prod_id) @@ -3589,7 +3587,6 @@ static struct attribute *kbase_attrs[] = { &dev_attr_reset_timeout.attr, &dev_attr_js_scheduling_period.attr, &dev_attr_power_policy.attr, - &dev_attr_core_availability_policy.attr, &dev_attr_core_mask.attr, &dev_attr_mem_pool_size.attr, &dev_attr_mem_pool_max_size.attr, @@ -3643,10 +3640,12 @@ static int kbase_platform_device_remove(struct platform_device *pdev) kbdev->inited_subsys &= ~inited_get_device; } +#ifdef MALI_KBASE_BUILD if (kbdev->inited_subsys & inited_debugfs) { kbase_device_debugfs_term(kbdev); kbdev->inited_subsys &= ~inited_debugfs; } +#endif if (kbdev->inited_subsys & inited_job_fault) { kbase_debug_job_fault_dev_term(kbdev); @@ -3660,14 +3659,30 @@ static int kbase_platform_device_remove(struct platform_device *pdev) } #endif + + if (kbdev->inited_subsys & inited_backend_late) { + kbase_backend_late_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_late; + } + if (kbdev->inited_subsys & inited_vinstr) { kbase_vinstr_term(kbdev->vinstr_ctx); kbdev->inited_subsys &= ~inited_vinstr; } - if (kbdev->inited_subsys & inited_backend_late) { - kbase_backend_late_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_late; + if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) { + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt; + } + + if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) { + kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx; + } + + if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) { + kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); + kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface; } if (kbdev->inited_subsys & inited_tlstream) { @@ -3892,20 +3907,40 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_tlstream; - err = kbase_backend_late_init(kbdev); + /* Initialize the kctx list. This is used by vinstr. */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); if (err) { - dev_err(kbdev->dev, "Late backend initialization failed\n"); + dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n"); kbase_platform_device_remove(pdev); return err; } - kbdev->inited_subsys |= inited_backend_late; + kbdev->inited_subsys |= inited_hwcnt_gpu_iface; - /* Initialize the kctx list. This is used by vinstr. */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); + err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, + &kbdev->hwcnt_gpu_ctx); + if (err) { + dev_err(kbdev->dev, + "GPU hwcnt context initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_hwcnt_gpu_ctx; + + err = kbase_hwcnt_virtualizer_init( + kbdev->hwcnt_gpu_ctx, &kbdev->hwcnt_gpu_virt); + if (err) { + dev_err(kbdev->dev, + "GPU hwcnt virtualizer initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_hwcnt_gpu_virt; - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { + err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); + if (err) { dev_err(kbdev->dev, "Virtual instrumentation initialization failed\n"); kbase_platform_device_remove(pdev); @@ -3913,8 +3948,18 @@ static int kbase_platform_device_probe(struct platform_device *pdev) } kbdev->inited_subsys |= inited_vinstr; + err = kbase_backend_late_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Late backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_backend_late; + + + #ifdef CONFIG_MALI_DEVFREQ - /* Devfreq uses vinstr, so must be initialized after it. */ + /* Devfreq uses hardware counters, so must be initialized after it. */ err = kbase_devfreq_init(kbdev); if (!err) kbdev->inited_subsys |= inited_devfreq; @@ -3922,6 +3967,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) dev_err(kbdev->dev, "Continuing without devfreq\n"); #endif /* CONFIG_MALI_DEVFREQ */ +#ifdef MALI_KBASE_BUILD err = kbase_debug_job_fault_dev_init(kbdev); if (err) { dev_err(kbdev->dev, "Job fault debug initialization failed\n"); @@ -4000,6 +4046,7 @@ static int kbase_platform_device_probe(struct platform_device *pdev) "Probed as %s\n", dev_name(kbdev->mdev.this_device)); kbase_dev_nr++; +#endif /* MALI_KBASE_BUILD */ return err; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c index 85a6afdb4ef3..bda05602de5e 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +++ b/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -121,7 +121,8 @@ int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) kctx->as_nr = free_as; kbdev->as_to_kctx[free_as] = kctx; - kbase_mmu_update(kctx); + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); } } else { atomic_dec(&kctx->refcount); @@ -193,7 +194,8 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) if (atomic_read(&kctx->refcount)) { WARN_ON(kctx->as_nr != i); - kbase_mmu_update(kctx); + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); } else { /* This context might have been assigned an * AS before, clear it. diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index d2c09d6658f2..88bb0d38d5a8 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,30 @@ static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) return ret; } +static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) { + list_del(&event->head); + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); + + wake_up(&kctx->kbdev->job_fault_resume_wq); + flush_work(&event->job_fault_work); + + /* job_fault_event_list can only have a single atom for + * each context. + */ + return; + } + } + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); +} + static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -71,7 +95,7 @@ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, unsigned long flags; spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - if (list_empty(event_list)) { + while (list_empty(event_list)) { spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); if (wait_event_interruptible(kbdev->job_fault_wq, kbase_is_job_fault_event_pending(kbdev))) @@ -237,6 +261,9 @@ bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, return true; } + if (kbase_ctx_flag(kctx, KCTX_DYING)) + return false; + if (kctx->kbdev->job_fault_debug == true) { if (completion_code != BASE_JD_EVENT_DONE) { @@ -337,7 +364,7 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) * job done but we delayed it. Now we should clean cache * earlier. Then the GPU memory dump should be correct. */ - kbase_backend_cacheclean(kbdev, event->katom); + kbase_backend_cache_clean(kbdev, event->katom); } else return NULL; @@ -488,6 +515,13 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx) vfree(kctx->reg_dump); } +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) +{ + WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + + kbase_ctx_remove_pending_event(kctx); +} + #else /* CONFIG_DEBUG_FS */ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index f5ab0a44c1d4..ef69627cdce8 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -64,6 +64,21 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx); */ void kbase_debug_job_fault_context_term(struct kbase_context *kctx); +/** + * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault + * dumping on context termination. + * + * This function is called during context termination to unblock the atom for + * which the job fault occurred and also the atoms following it. This is needed + * otherwise the wait for zero jobs could timeout (leading to an assertion + * failure, kernel panic in debug builds) in the pathological case where + * although the thread/daemon capturing the job fault events is running, + * but for some reasons has stopped consuming the events. + * + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); + /** * kbase_debug_job_fault_process - Process the failed job. * It will send a event and wake up the job fault waiting queue diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 857fe9712ef9..8f46117ab9db 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -32,6 +32,10 @@ #ifdef CONFIG_DEBUG_FS +#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) +#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +#endif + struct debug_mem_mapping { struct list_head node; @@ -130,7 +134,7 @@ static int debug_mem_show(struct seq_file *m, void *v) if (!(map->flags & KBASE_REG_CPU_CACHED)) prot = pgprot_writecombine(prot); - page = phys_to_page(as_phys_addr_t(map->alloc->pages[data->offset])); + page = as_page(map->alloc->pages[data->offset]); mapping = vmap(&page, 1, VM_MAP, prot); if (!mapping) goto out; @@ -199,9 +203,12 @@ static int debug_mem_open(struct inode *i, struct file *file) struct debug_mem_data *mem_data; int ret; + if (get_file_rcu(kctx_file) == 0) + return -ENOENT; + ret = seq_open(file, &ops); if (ret) - return ret; + goto open_fail; mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); if (!mem_data) { @@ -213,8 +220,6 @@ static int debug_mem_open(struct inode *i, struct file *file) INIT_LIST_HEAD(&mem_data->mapping_list); - get_file(kctx_file); - kbase_gpu_vm_lock(kctx); ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); @@ -223,12 +228,6 @@ static int debug_mem_open(struct inode *i, struct file *file) goto out; } - ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); if (0 != ret) { kbase_gpu_vm_unlock(kctx); @@ -252,10 +251,12 @@ out: list_del(&mapping->node); kfree(mapping); } - fput(kctx_file); kfree(mem_data); } seq_release(i, file); +open_fail: + fput(kctx_file); + return ret; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index 3df17ac1d6ad..a135742ee980 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -40,8 +40,10 @@ #include #include #include +#include #include + #include #include #include @@ -52,7 +54,6 @@ #include #endif - #if defined(CONFIG_SYNC) #include #else @@ -143,8 +144,6 @@ #define BASE_MAX_NR_AS 16 /* mmu */ -#define MIDGARD_MMU_VA_BITS 48 - #define MIDGARD_MMU_LEVEL(x) (x) #define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) @@ -171,6 +170,12 @@ /* Maximum force replay limit when randomization is enabled */ #define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 +/* Maximum number of pages of memory that require a permanent mapping, per + * kbase_context + */ +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \ + PAGE_SHIFT) + /** Atom has been previously soft-stoppped */ #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) /** Atom has been previously retried to execute */ @@ -195,6 +200,8 @@ #define KBASE_KATOM_FLAG_PROTECTED (1<<11) /* Atom has been stored in runnable_tree */ #define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom is waiting for L2 caches to power up in order to enter protected mode */ +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -417,21 +424,26 @@ enum kbase_atom_gpu_rb_state { * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms * currently submitted to GPU and protected mode transition is * not already in progress. - * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into - * protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation * for the coherency change. L2 shall be powered down and GPU shall * come out of fully coherent mode before entering protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; Prepare coherency change and switch - * GPU to protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; + * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that + * coherency register contains correct value when GPU enters + * protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check + * that L2 is powered up and switch GPU to protected mode. */ enum kbase_atom_enter_protected_state { /** * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. */ KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - KBASE_ATOM_ENTER_PROTECTED_VINSTR, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, KBASE_ATOM_ENTER_PROTECTED_FINISHED, }; @@ -497,12 +509,9 @@ struct kbase_ext_res { * external resources referenced by the atom. * @device_nr: indicates the coregroup with which the atom is associated, * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. - * @affinity: bitmask of the shader cores on which the atom can execute. * @jc: GPU address of the job-chain. * @softjob_data: Copy of data read from the user space buffer that @jc * points to. - * @coreref_state: state of the atom with respect to retention of shader - * cores for affinity & power management. * @fence: Stores either an input or output sync fence, depending * on soft-job type * @sync_waiter: Pointer to the sync fence waiter structure passed to the @@ -593,10 +602,8 @@ struct kbase_jd_atom { struct kbase_ext_res *extres; u32 device_nr; - u64 affinity; u64 jc; void *softjob_data; - enum kbase_atom_coreref_state coreref_state; #if defined(CONFIG_SYNC) struct sync_fence *fence; struct sync_fence_waiter sync_waiter; @@ -691,7 +698,7 @@ struct kbase_jd_atom { enum kbase_atom_gpu_rb_state gpu_rb_state; - u64 need_cache_flush_cores_retained; + bool need_cache_flush_cores_retained; atomic_t blocked; @@ -724,6 +731,33 @@ struct kbase_jd_atom { u32 age; }; +/** + * struct kbase_debug_copy_buffer - information about the buffer to be copied. + * + * @size: size of the buffer in bytes + * @pages: pointer to an array of pointers to the pages which contain + * the buffer + * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was + * allocated with kcalloc + * @nr_pages: number of pages + * @offset: offset into the pages + * @gpu_alloc: pointer to physical memory allocated by the GPU + * @extres_pages: array of pointers to the pages containing external resources + * for this buffer + * @nr_extres_pages: number of pages in @extres_pages + */ +struct kbase_debug_copy_buffer { + size_t size; + struct page **pages; + bool is_vmalloc; + int nr_pages; + size_t offset; + struct kbase_mem_phy_alloc *gpu_alloc; + + struct page **extres_pages; + int nr_extres_pages; +}; + static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) { return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); @@ -820,6 +854,21 @@ struct kbase_mmu_setup { u64 transcfg; }; +/** + * struct kbase_fault - object containing data relating to a page or bus fault. + * @addr: Records the faulting address. + * @extra_addr: Records the secondary fault address. + * @status: Records the fault status as reported by Hw. + * @protected_mode: Flag indicating whether the fault occurred in protected mode + * or not. + */ +struct kbase_fault { + u64 addr; + u64 extra_addr; + u32 status; + bool protected_mode; +}; + /** * struct kbase_as - object representing an address space of GPU. * @number: Index at which this address space structure is present @@ -831,11 +880,8 @@ struct kbase_mmu_setup { * @work_busfault: Work item for the Bus fault handling. * @fault_type: Type of fault which occured for this address space, * regular/unexpected Bus or Page fault. - * @protected_mode: Flag indicating whether the fault occurred in protected - * mode or not. - * @fault_status: Records the fault status as reported by Hw. - * @fault_addr: Records the faulting address. - * @fault_extra_addr: Records the secondary fault address. + * @pf_data: Data relating to page fault. + * @bf_data: Data relating to bus fault. * @current_setup: Stores the MMU configuration for this address space. * @poke_wq: Workqueue to process the work items queue for poking the * MMU as a WA for BASE_HW_ISSUE_8316. @@ -853,10 +899,8 @@ struct kbase_as { struct work_struct work_pagefault; struct work_struct work_busfault; enum kbase_mmu_fault_type fault_type; - bool protected_mode; - u32 fault_status; - u64 fault_addr; - u64 fault_extra_addr; + struct kbase_fault pf_data; + struct kbase_fault bf_data; struct kbase_mmu_setup current_setup; struct workqueue_struct *poke_wq; struct work_struct poke_work; @@ -865,6 +909,28 @@ struct kbase_as { struct hrtimer poke_timer; }; +/** + * struct kbase_mmu_table - object representing a set of GPU page tables + * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries + * of top & intermediate level page tables to avoid + * repeated calls to kmap_atomic during the MMU teardown. + * @mmu_lock: Lock to serialize the accesses made to multi level GPU + * page tables + * @pgd: Physical address of the page allocated for the top + * level page table of the context, this is used for + * MMU HW programming as the address translation will + * start from the top level page table. + * @kctx: If this set of MMU tables belongs to a context then + * this is a back-reference to the context, otherwise + * it is NULL + */ +struct kbase_mmu_table { + u64 *mmu_teardown_pages; + struct mutex mmu_lock; + phys_addr_t pgd; + struct kbase_context *kctx; +}; + static inline int kbase_as_has_bus_fault(struct kbase_as *as) { return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS; @@ -945,88 +1011,6 @@ struct kbase_trace { u8 flags; }; -/** Event IDs for the power management framework. - * - * Any of these events might be missed, so they should not be relied upon to - * find the precise state of the GPU at a particular time in the - * trace. Overall, we should get a high percentage of these events for - * statisical purposes, and so a few missing should not be a problem */ -enum kbase_timeline_pm_event { - /* helper for tests */ - KBASEP_TIMELINE_PM_EVENT_FIRST, - - /** Event reserved for backwards compatibility with 'init' events */ - KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST, - - /** The power state of the device has changed. - * - * Specifically, the device has reached a desired or available state. - */ - KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED, - - /** The GPU is becoming active. - * - * This event is sent when the first context is about to use the GPU. - */ - KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE, - - /** The GPU is becoming idle. - * - * This event is sent when the last context has finished using the GPU. - */ - KBASE_TIMELINE_PM_EVENT_GPU_IDLE, - - /** Event reserved for backwards compatibility with 'policy_change' - * events */ - KBASE_TIMELINE_PM_EVENT_RESERVED_4, - - /** Event reserved for backwards compatibility with 'system_suspend' - * events */ - KBASE_TIMELINE_PM_EVENT_RESERVED_5, - - /** Event reserved for backwards compatibility with 'system_resume' - * events */ - KBASE_TIMELINE_PM_EVENT_RESERVED_6, - - /** The job scheduler is requesting to power up/down cores. - * - * This event is sent when: - * - powered down cores are needed to complete a job - * - powered up cores are not needed anymore - */ - KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, - - KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, -}; - -#ifdef CONFIG_MALI_TRACE_TIMELINE -struct kbase_trace_kctx_timeline { - atomic_t jd_atoms_in_flight; - u32 owner_tgid; -}; - -struct kbase_trace_kbdev_timeline { - /* Note: strictly speaking, not needed, because it's in sync with - * kbase_device::jm_slots[]::submitted_nr - * - * But it's kept as an example of how to add global timeline tracking - * information - * - * The caller must hold hwaccess_lock when accessing this */ - u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; - - /* Last UID for each PM event */ - atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1]; - /* Counter for generating PM event UIDs */ - atomic_t pm_event_uid_counter; - /* - * L2 transition state - true indicates that the transition is ongoing - * Expected to be protected by hwaccess_lock */ - bool l2_transitioning; -}; -#endif /* CONFIG_MALI_TRACE_TIMELINE */ - - struct kbasep_kctx_list_element { struct list_head link; struct kbase_context *kctx; @@ -1048,7 +1032,11 @@ struct kbase_pm_device_data { */ struct mutex lock; - /** The reference count of active contexts on this device. */ + /** + * The reference count of active contexts on this device. Note that + * some code paths keep shaders/the tiler powered whilst this is 0. Use + * kbase_pm_is_active() instead to check for such cases. + */ int active_count; /** Flag indicating suspending/suspended */ bool suspending; @@ -1081,15 +1069,6 @@ struct kbase_pm_device_data { /* Time in milliseconds between each dvfs sample */ u32 dvfs_period; - /* Period of GPU poweroff timer */ - ktime_t gpu_poweroff_time; - - /* Number of ticks of GPU poweroff timer before shader is powered off */ - int poweroff_shader_ticks; - - /* Number of ticks of GPU poweroff timer before GPU is powered off */ - int poweroff_gpu_ticks; - struct kbase_pm_backend_data backend; }; @@ -1141,9 +1120,33 @@ struct kbase_devfreq_opp { u64 core_mask; }; +/* MMU mode flags */ +#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ + +/** + * struct kbase_mmu_mode - object containing pointer to methods invoked for + * programming the MMU, as per the MMU mode supported + * by Hw. + * @update: enable & setup/configure one of the GPU address space. + * @get_as_setup: retrieve the configuration of one of the GPU address space. + * @disable_as: disable one of the GPU address space. + * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. + * @ate_is_valid: check if the pte is a valid address translation entry + * encoding the physical address of the actual mapped page. + * @pte_is_valid: check if the pte is a valid entry encoding the physical + * address of the next lower level page table. + * @entry_set_ate: program the pte to be a valid address translation entry to + * encode the physical address of the actual page being mapped. + * @entry_set_pte: program the pte to be a valid entry to encode the physical + * address of the next lower level page table. + * @entry_invalidate: clear out or invalidate the pte. + * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. + */ struct kbase_mmu_mode { - void (*update)(struct kbase_context *kctx); - void (*get_as_setup)(struct kbase_context *kctx, + void (*update)(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr); + void (*get_as_setup)(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup); void (*disable_as)(struct kbase_device *kbdev, int as_nr); phys_addr_t (*pte_to_phy_addr)(u64 entry); @@ -1153,6 +1156,7 @@ struct kbase_mmu_mode { unsigned long flags, unsigned int level); void (*entry_set_pte)(u64 *entry, phys_addr_t phy); void (*entry_invalidate)(u64 *entry); + unsigned long flags; }; struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); @@ -1161,6 +1165,7 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 + /** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. @@ -1227,7 +1232,7 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @mmu_mode: Pointer to the object containing methods for programming * the MMU, depending on the type of MMU supported by Hw. * @as: Array of objects representing address spaces of GPU. - * @as_free: Bitpattern of free/available address space lots + * @as_free: Bitpattern of free/available GPU address spaces. * @as_to_kctx: Array of pointers to struct kbase_context, having * GPU adrress spaces assigned to them. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask @@ -1236,48 +1241,19 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues * @hw_features_mask: List of available HW features. - * shader_inuse_bitmap: Bitmaps of shader cores that are currently in use. - * These should be kept up to date by the job scheduler. - * The bit to be set in this bitmap should already be set - * in the @shader_needed_bitmap. - * @pm.power_change_lock should be held when accessing - * these members. - * @shader_inuse_cnt: Usage count for each of the 64 shader cores - * @shader_needed_bitmap: Bitmaps of cores the JS needs for jobs ready to run - * kbase_pm_check_transitions_nolock() should be called - * when the bitmap is modified to update the power - * management system and allow transitions to occur. - * @shader_needed_cnt: Count for each of the 64 shader cores, incremented - * when the core is requested for use and decremented - * later when the core is known to be powered up for use. - * @tiler_inuse_cnt: Usage count for the Tiler block. @tiler_needed_cnt - * should be non zero at the time of incrementing the - * usage count. - * @tiler_needed_cnt: Count for the Tiler block shader cores, incremented - * when Tiler is requested for use and decremented - * later when Tiler is known to be powered up for use. * @disjoint_event: struct for keeping track of the disjoint information, * that whether the GPU is in a disjoint state and the * number of disjoint events that have occurred on GPU. - * @l2_users_count: Refcount for tracking users of the l2 cache, e.g. - * when using hardware counter instrumentation. - * @shader_available_bitmap: Bitmap of shader cores that are currently available, - * powered up and the power policy is happy for jobs - * to be submitted to these cores. These are updated - * by the power management code. The job scheduler - * should avoid submitting new jobs to any cores - * that are not marked as available. - * @tiler_available_bitmap: Bitmap of tiler units that are currently available. - * @l2_available_bitmap: Bitmap of the currently available Level 2 caches. - * @stack_available_bitmap: Bitmap of the currently available Core stacks. - * @shader_ready_bitmap: Bitmap of shader cores that are ready (powered on) - * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing - * power state. * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts * @hwcnt: Structure used for instrumentation and HW counters * dumping + * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. + * @hwcnt_gpu_ctx: Context for GPU hardware counter access. + * @hwaccess_lock must be held when calling + * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. + * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. * @vinstr_ctx: vinstr context created per device * @trace_lock: Lock to serialize the access to trace buffer. * @trace_first_out: Index/offset in the trace buffer at which the first @@ -1290,8 +1266,14 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to * complete for the GPU jobs before proceeding with the * GPU reset. - * @cacheclean_lock: Lock to serialize the clean & invalidation of GPU caches, - * between Job Manager backend & Instrumentation code. + * @cache_clean_in_progress: Set when a cache clean has been started, and + * cleared when it has finished. This prevents multiple + * cache cleans being done simultaneously. + * @cache_clean_queued: Set if a cache clean is invoked while another is in + * progress. If this happens, another cache clean needs + * to be triggered immediately after completion of the + * current one. + * @cache_clean_wait: Signalled when a cache clean has finished. * @platform_context: Platform specific private data to be accessed by * platform specific config files only. * @kctx_list: List of kbase_contexts created for the device, including @@ -1324,8 +1306,10 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * GPU device. * @devfreq_cooling: Pointer returned on registering devfreq cooling device * corresponding to @devfreq. - * @ipa_use_configured_model: set to TRUE when configured model is used for IPA and - * FALSE when fallback model is used. + * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected + * mode. It is a sticky flag which is cleared by IPA + * once it has made use of information that GPU had + * previously entered protected mode. * @ipa: Top level structure for IPA, containing pointers to both * configured & fallback models. * @timeline: Stores the global timeline tracking information. @@ -1350,8 +1334,6 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs * file "read_register". - * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer - * dumping and s/w counter reporting. * @force_replay_limit: Number of gpu jobs, having replay atoms associated with them, * that are run before a job is forced to fail and replay. * Set to 0 to disable forced failures. @@ -1394,6 +1376,13 @@ struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); * @protected_mode: set to TRUE when GPU is put into protected mode * @protected_mode_transition: set to TRUE when GPU is transitioning into or * out of protected mode. + * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be + * enabled. Counters must be disabled before transition + * into protected mode. + * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not + * enabled. + * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware + * counters, used if atomic disable is not possible. * @protected_mode_support: set to true if protected mode is supported. * @buslogger: Pointer to the structure required for interfacing * with the bus logger module to set the size of buffer @@ -1460,7 +1449,6 @@ struct kbase_device { u16 as_free; /* Bitpattern of free Address Spaces */ struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; - spinlock_t mmu_mask_change; struct kbase_gpu_props gpu_props; @@ -1468,33 +1456,11 @@ struct kbase_device { unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - u64 shader_inuse_bitmap; - - u32 shader_inuse_cnt[64]; - - u64 shader_needed_bitmap; - - u32 shader_needed_cnt[64]; - - u32 tiler_inuse_cnt; - - u32 tiler_needed_cnt; - struct { atomic_t count; atomic_t state; } disjoint_event; - u32 l2_users_count; - - u64 shader_available_bitmap; - u64 tiler_available_bitmap; - u64 l2_available_bitmap; - u64 stack_available_bitmap; - - u64 shader_ready_bitmap; - u64 shader_transitioning_bitmap; - s8 nr_hw_address_spaces; s8 nr_user_address_spaces; @@ -1504,10 +1470,14 @@ struct kbase_device { struct kbase_context *kctx; u64 addr; + u64 addr_bytes; struct kbase_instr_backend backend; } hwcnt; + struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; + struct kbase_hwcnt_context *hwcnt_gpu_ctx; + struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; struct kbase_vinstr_context *vinstr_ctx; #if KBASE_TRACE_ENABLE @@ -1519,7 +1489,9 @@ struct kbase_device { u32 reset_timeout_ms; - struct mutex cacheclean_lock; + bool cache_clean_in_progress; + bool cache_clean_queued; + wait_queue_head_t cache_clean_wait; void *platform_context; @@ -1542,7 +1514,7 @@ struct kbase_device { #else struct thermal_cooling_device *devfreq_cooling; #endif - atomic_t ipa_use_configured_model; + bool ipa_protection_mode_switched; struct { /* Access to this struct must be with ipa.lock held */ struct mutex lock; @@ -1554,35 +1526,15 @@ struct kbase_device { * the difference between last_metrics and the current values. */ struct kbasep_pm_metrics last_metrics; - - /* - * gpu_active_callback - Inform IPA that GPU is now active - * @model_data: Pointer to model data - */ - void (*gpu_active_callback)( - struct kbase_ipa_model_vinstr_data *model_data); - - /* - * gpu_idle_callback - Inform IPA that GPU is now idle - * @model_data: Pointer to model data - */ - void (*gpu_idle_callback)( - struct kbase_ipa_model_vinstr_data *model_data); - /* Model data to pass to ipa_gpu_active/idle() */ struct kbase_ipa_model_vinstr_data *model_data; - /* true if IPA is currently using vinstr */ - bool vinstr_active; + /* true if use of fallback model has been forced by the User */ + bool force_fallback_model; } ipa; #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ - -#ifdef CONFIG_MALI_TRACE_TIMELINE - struct kbase_trace_kbdev_timeline timeline; -#endif - bool job_fault_debug; #ifdef CONFIG_DEBUG_FS @@ -1606,8 +1558,6 @@ struct kbase_device { #endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ - u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; - #if MALI_CUSTOMER_RELEASE == 0 int force_replay_limit; @@ -1652,6 +1602,12 @@ struct kbase_device { bool protected_mode_transition; + bool protected_mode_hwcnt_desired; + + bool protected_mode_hwcnt_disabled; + + struct work_struct protected_mode_hwcnt_disable_work; + bool protected_mode_support; #ifdef CONFIG_MALI_FPGA_BUS_LOGGER @@ -1669,12 +1625,13 @@ struct kbase_device { /* See KBASE_SERIALIZE_* for details */ u8 serialize_jobs; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT u8 backup_serialize_jobs; #endif /* See KBASE_JS_*_PRIORITY_MODE for details. */ u32 js_ctx_scheduling_mode; + }; /** @@ -1778,6 +1735,7 @@ struct kbase_sub_alloc { DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); }; + /** * struct kbase_context - Object representing an entity, among which GPU is * scheduled and gets its own GPU address space. @@ -1785,14 +1743,12 @@ struct kbase_sub_alloc { * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. * @kbdev: Pointer to the Kbase device for which the context is created. + * @mmu: Structure holding details of the MMU tables for this + * context * @id: Unique indentifier for the context, indicates the number of * contexts which have been created for the device so far. * @api_version: contains the version number for User/kernel interface, * used for compatibility check. - * @pgd: Physical address of the page allocated for the top level - * page table of the context, this will be used for MMU Hw - * programming as the address translation will start from - * the top level page table. * @event_list: list of posted events about completed atoms, to be sent to * event handling thread of Userpsace. * @event_coalesce_list: list containing events corresponding to successive atoms @@ -1816,9 +1772,6 @@ struct kbase_sub_alloc { * @api_version value 0. * @setup_in_progress: Indicates if the context's setup is in progress and other * setup calls during that shall be rejected. - * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries of - * top & intermediate level page tables to avoid repeated - * calls to kmap_atomic during the MMU teardown. * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, * which can alias number of memory regions. The page is * represent a region where it is mapped with a write-alloc @@ -1829,20 +1782,17 @@ struct kbase_sub_alloc { * @mem_partials: List head for the list of large pages, 2MB in size, which * which have been split into 4 KB pages and are used * partially for the allocations >= 2 MB in size. - * @mmu_lock: Lock to serialize the accesses made to multi level GPU - * page tables, maintained for every context. * @reg_lock: Lock used for GPU virtual address space management operations, * like adding/freeing a memory region in the address space. * Can be converted to a rwlock ?. * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA * zone of the GPU virtual address space. Used for allocations * having the same value for GPU & CPU virtual address. - * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC - * zone of the GPU virtual address space. Used for - * allocations containing executable code for - * shader programs. * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA * zone of the GPU virtual address space. + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA + * zone of the GPU virtual address space. Used for GPU-executable + * allocations which don't need the SAME_VA property. * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for * SAME_VA allocations to defer the reservation of memory region * (from the GPU virtual address space) from base_mem_alloc @@ -1868,6 +1818,7 @@ struct kbase_sub_alloc { * @nonmapped_pages: Updated in the same way as @used_pages, except for the case * when special tracking page is freed by userspace where it * is reset to 0. + * @permanent_mapped_pages: Usage count of permanently mapped memory * @mem_pool: Object containing the state for the context specific pool of * 4KB size physical pages. * @lp_mem_pool: Object containing the state for the context specific pool of @@ -1914,6 +1865,10 @@ struct kbase_sub_alloc { * pages used for GPU allocations, done for the context, * to the memory consumed by the process. * @same_va_end: End address of the SAME_VA zone (in 4KB page units) + * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) + * or U64_MAX if the EXEC_VA zone is uninitialized. + * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @jit_va: Indicates if a JIT_VA zone has been created. * @timeline: Object tracking the number of atoms currently in flight for * the context and thread group id of the process, i.e. @tgid. * @mem_profile_data: Buffer containing the profiling information provided by @@ -1948,9 +1903,11 @@ struct kbase_sub_alloc { * @slots_pullable: Bitmask of slots, indicating the slots for which the * context has pullable atoms in the runnable tree. * @work: Work structure used for deferred ASID assignment. - * @vinstr_cli: Pointer to the legacy userspace vinstr client, there can - * be only such client per kbase context. - * @vinstr_cli_lock: Lock used for the vinstr ioctl calls made for @vinstr_cli. + * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters + * client, there can be only such client per kbase + * context. + * @legacy_hwcnt_lock: Lock used to prevent concurrent access to + * @legacy_hwcnt_cli. * @completed_jobs: List containing completed atoms for which base_jd_event is * to be posted. * @work_count: Number of work items, corresponding to atoms, currently @@ -2010,9 +1967,10 @@ struct kbase_sub_alloc { struct kbase_context { struct file *filp; struct kbase_device *kbdev; + struct kbase_mmu_table mmu; + u32 id; unsigned long api_version; - phys_addr_t pgd; struct list_head event_list; struct list_head event_coalesce_list; struct mutex event_mutex; @@ -2026,18 +1984,16 @@ struct kbase_context { atomic_t setup_complete; atomic_t setup_in_progress; - u64 *mmu_teardown_pages; - struct tagged_addr aliasing_sink_page; - struct mutex mem_partials_lock; + spinlock_t mem_partials_lock; struct list_head mem_partials; - struct mutex mmu_lock; struct mutex reg_lock; struct rb_root reg_rbtree_same; - struct rb_root reg_rbtree_exec; struct rb_root reg_rbtree_custom; + struct rb_root reg_rbtree_exec; + unsigned long cookies; struct kbase_va_region *pending_regions[BITS_PER_LONG]; @@ -2049,6 +2005,7 @@ struct kbase_context { struct kbase_jd_context jctx; atomic_t used_pages; atomic_t nonmapped_pages; + unsigned long permanent_mapped_pages; struct kbase_mem_pool mem_pool; struct kbase_mem_pool lp_mem_pool; @@ -2069,6 +2026,7 @@ struct kbase_context { atomic_t refcount; + /* NOTE: * * Flags are in jctx.sched_info.ctx.flags @@ -2076,12 +2034,12 @@ struct kbase_context { * * All other flags must be added there */ spinlock_t mm_update_lock; - struct mm_struct *process_mm; + struct mm_struct __rcu *process_mm; u64 same_va_end; + u64 exec_va_start; + u64 gpu_va_end; + bool jit_va; -#ifdef CONFIG_MALI_TRACE_TIMELINE - struct kbase_trace_kctx_timeline timeline; -#endif #ifdef CONFIG_DEBUG_FS char *mem_profile_data; size_t mem_profile_size; @@ -2108,8 +2066,8 @@ struct kbase_context { struct work_struct work; - struct kbase_vinstr_client *vinstr_cli; - struct mutex vinstr_cli_lock; + struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; + struct mutex legacy_hwcnt_lock; struct list_head completed_jobs; atomic_t work_count; @@ -2138,7 +2096,7 @@ struct kbase_context { u8 trim_level; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; bool gwt_was_enabled; @@ -2152,7 +2110,7 @@ struct kbase_context { s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; }; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT /** * struct kbasep_gwt_list_element - Structure used to collect GPU * write faults. diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index e58e27c793f3..530bb45c8ec0 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -39,8 +39,6 @@ #include #include -#include - /* NOTE: Magic - 0x45435254 (TRCE in ASCII). * Supports tracing feature provided in the base module. * Please keep it in sync with the value of base module. @@ -81,7 +79,8 @@ static int kbase_device_as_init(struct kbase_device *kbdev, int i) snprintf(name, sizeof(name), format, i); kbdev->as[i].number = i; - kbdev->as[i].fault_addr = 0ULL; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); if (!kbdev->as[i].pf_wq) @@ -148,7 +147,7 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev) int kbase_device_init(struct kbase_device * const kbdev) { - int i, err; + int err; #ifdef CONFIG_ARM64 struct device_node *np = NULL; #endif /* CONFIG_ARM64 */ @@ -223,19 +222,7 @@ int kbase_device_init(struct kbase_device * const kbdev) if (err) goto term_as; - mutex_init(&kbdev->cacheclean_lock); - -#ifdef CONFIG_MALI_TRACE_TIMELINE - for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) - kbdev->timeline.slot_atoms_submitted[i] = 0; - - for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i) - atomic_set(&kbdev->timeline.pm_event_uid[i], 0); -#endif /* CONFIG_MALI_TRACE_TIMELINE */ - - /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */ - for (i = 0; i < FBDUMP_CONTROL_MAX; i++) - kbdev->kbase_profiling_controls[i] = 0; + init_waitqueue_head(&kbdev->cache_clean_wait); kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); @@ -285,91 +272,6 @@ void kbase_device_free(struct kbase_device *kbdev) kfree(kbdev); } -int kbase_device_trace_buffer_install( - struct kbase_context *kctx, u32 *tb, size_t size) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(tb); - - /* Interface uses 16-bit value to track last accessed entry. Each entry - * is composed of two 32-bit words. - * This limits the size that can be handled without an overflow. */ - if (0xFFFF * (2 * sizeof(u32)) < size) - return -EINVAL; - - /* set up the header */ - /* magic number in the first 4 bytes */ - tb[0] = TRACE_BUFFER_HEADER_SPECIAL; - /* Store (write offset = 0, wrap counter = 0, transaction active = no) - * write offset 0 means never written. - * Offsets 1 to (wrap_offset - 1) used to store values when trace started - */ - tb[1] = 0; - - /* install trace buffer */ - spin_lock_irqsave(&kctx->jctx.tb_lock, flags); - kctx->jctx.tb_wrap_offset = size / 8; - kctx->jctx.tb = tb; - spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); - - return 0; -} - -void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(kctx); - spin_lock_irqsave(&kctx->jctx.tb_lock, flags); - kctx->jctx.tb = NULL; - kctx->jctx.tb_wrap_offset = 0; - spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); -} - -void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value) -{ - unsigned long flags; - - spin_lock_irqsave(&kctx->jctx.tb_lock, flags); - if (kctx->jctx.tb) { - u16 wrap_count; - u16 write_offset; - u32 *tb = kctx->jctx.tb; - u32 header_word; - - header_word = tb[1]; - KBASE_DEBUG_ASSERT(0 == (header_word & 0x1)); - - wrap_count = (header_word >> 1) & 0x7FFF; - write_offset = (header_word >> 16) & 0xFFFF; - - /* mark as transaction in progress */ - tb[1] |= 0x1; - mb(); - - /* calculate new offset */ - write_offset++; - if (write_offset == kctx->jctx.tb_wrap_offset) { - /* wrap */ - write_offset = 1; - wrap_count++; - wrap_count &= 0x7FFF; /* 15bit wrap counter */ - } - - /* store the trace entry at the selected offset */ - tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0); - tb[write_offset * 2 + 1] = reg_value; - mb(); - - /* new header word */ - header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */ - tb[1] = header_word; - } - spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); -} - /* * Device trace functions */ @@ -636,39 +538,3 @@ void kbasep_trace_dump(struct kbase_device *kbdev) CSTD_UNUSED(kbdev); } #endif /* KBASE_TRACE_ENABLE */ - -void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value) -{ - switch (control) { - case FBDUMP_CONTROL_ENABLE: - /* fall through */ - case FBDUMP_CONTROL_RATE: - /* fall through */ - case SW_COUNTER_ENABLE: - /* fall through */ - case FBDUMP_CONTROL_RESIZE_FACTOR: - kbdev->kbase_profiling_controls[control] = value; - break; - default: - dev_err(kbdev->dev, "Profiling control %d not found\n", control); - break; - } -} - -/* - * Called by gator to control the production of - * profiling information at runtime - * */ - -void _mali_profiling_control(u32 action, u32 value) -{ - struct kbase_device *kbdev = NULL; - - /* find the first i.e. call with -1 */ - kbdev = kbase_find_device(-1); - - if (NULL != kbdev) - kbase_set_profiling_control(kbdev, action, value); -} -KBASE_EXPORT_SYMBOL(_mali_profiling_control); - diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index e290fceea6cd..3c9cef364134 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,8 +38,6 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru data = katom->udata; - KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); - KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx); KBASE_TLSTREAM_TL_DEL_ATOM(katom); diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.c b/drivers/gpu/arm/midgard/mali_kbase_fence.c index ac8272c900bb..3272836efad8 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_fence.c +++ b/drivers/gpu/arm/midgard/mali_kbase_fence.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -177,6 +177,7 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, kbase_fence_cb->fence = fence; kbase_fence_cb->katom = katom; INIT_LIST_HEAD(&kbase_fence_cb->node); + atomic_inc(&katom->dma_fence.dep_count); err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, callback); @@ -189,15 +190,16 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, err = 0; kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); } else if (err) { kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); } else { /* * Get reference to fence that will be kept until callback gets * cleaned up in kbase_fence_free_callbacks(). */ dma_fence_get(fence); - atomic_inc(&katom->dma_fence.dep_count); /* Add callback to katom's list of callbacks */ list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 040b2096bec6..1719edf1e978 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,9 @@ #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_virtualizer.h" #define MALI_MAX_CORES_PER_GROUP 4 #define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 @@ -33,8 +36,9 @@ struct kbase_gator_hwcnt_handles { struct kbase_device *kbdev; - struct kbase_vinstr_client *vinstr_cli; - void *vinstr_buffer; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; struct work_struct dump_work; int dump_complete; spinlock_t dump_lock; @@ -73,10 +77,26 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) hardware_counters = hardware_counters_mali_tSIx; count = ARRAY_SIZE(hardware_counters_mali_tSIx); break; + case GPU_ID2_PRODUCT_TDVX: + hardware_counters = hardware_counters_mali_tSIx; + count = ARRAY_SIZE(hardware_counters_mali_tSIx); + break; case GPU_ID2_PRODUCT_TNOX: hardware_counters = hardware_counters_mali_tNOx; count = ARRAY_SIZE(hardware_counters_mali_tNOx); break; + case GPU_ID2_PRODUCT_TGOX: + hardware_counters = hardware_counters_mali_tGOx; + count = ARRAY_SIZE(hardware_counters_mali_tGOx); + break; + case GPU_ID2_PRODUCT_TKAX: + hardware_counters = hardware_counters_mali_tKAx; + count = ARRAY_SIZE(hardware_counters_mali_tKAx); + break; + case GPU_ID2_PRODUCT_TTRX: + hardware_counters = hardware_counters_mali_tTRx; + count = ARRAY_SIZE(hardware_counters_mali_tTRx); + break; default: hardware_counters = NULL; count = 0; @@ -157,8 +177,10 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { + int errcode; struct kbase_gator_hwcnt_handles *hand; - struct kbase_ioctl_hwcnt_reader_setup setup; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_map; uint32_t dump_size = 0, i = 0; if (!in_out_info) @@ -176,11 +198,20 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn if (!hand->kbdev) goto free_hand; - dump_size = kbase_vinstr_dump_size(hand->kbdev); - hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); - if (!hand->vinstr_buffer) + metadata = kbase_hwcnt_virtualizer_metadata( + hand->kbdev->hwcnt_gpu_virt); + if (!metadata) + goto release_device; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hand->enable_map); + if (errcode) goto release_device; - in_out_info->kernel_dump_buffer = hand->vinstr_buffer; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hand->dump_buf); + if (errcode) + goto free_enable_map; + + in_out_info->kernel_dump_buffer = hand->dump_buf.dump_buf; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; @@ -197,7 +228,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto free_dump_buf; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * @@ -240,7 +271,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto free_dump_buf; dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; @@ -259,17 +290,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } } + /* Calculated dump size must be the same as real dump size */ + if (WARN_ON(dump_size != metadata->dump_buf_bytes)) + goto free_layout; + in_out_info->nr_hwc_blocks = i; in_out_info->size = dump_size; - setup.jm_bm = in_out_info->bitmask[0]; - setup.tiler_bm = in_out_info->bitmask[1]; - setup.shader_bm = in_out_info->bitmask[2]; - setup.mmu_l2_bm = in_out_info->bitmask[3]; - hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, - &setup, hand->vinstr_buffer); - if (!hand->vinstr_cli) { - dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); + phys_map.jm_bm = in_out_info->bitmask[JM_BLOCK]; + phys_map.tiler_bm = in_out_info->bitmask[TILER_BLOCK]; + phys_map.shader_bm = in_out_info->bitmask[SHADER_BLOCK]; + phys_map.mmu_l2_bm = in_out_info->bitmask[MMU_L2_BLOCK]; + kbase_hwcnt_gpu_enable_map_from_physical(&hand->enable_map, &phys_map); + errcode = kbase_hwcnt_virtualizer_client_create( + hand->kbdev->hwcnt_gpu_virt, &hand->enable_map, &hand->hvcli); + if (errcode) { + dev_err(hand->kbdev->dev, + "Failed to register gator with hwcnt virtualizer core"); goto free_layout; } @@ -277,13 +314,12 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn free_layout: kfree(in_out_info->hwc_layout); - -free_vinstr_buffer: - kfree(hand->vinstr_buffer); - +free_dump_buf: + kbase_hwcnt_dump_buffer_free(&hand->dump_buf); +free_enable_map: + kbase_hwcnt_enable_map_free(&hand->enable_map); release_device: kbase_release_device(hand->kbdev); - free_hand: kfree(hand); return NULL; @@ -297,8 +333,9 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k if (opaque_handles) { cancel_work_sync(&opaque_handles->dump_work); - kbase_vinstr_detach_client(opaque_handles->vinstr_cli); - kfree(opaque_handles->vinstr_buffer); + kbase_hwcnt_virtualizer_client_destroy(opaque_handles->hvcli); + kbase_hwcnt_dump_buffer_free(&opaque_handles->dump_buf); + kbase_hwcnt_enable_map_free(&opaque_handles->enable_map); kbase_release_device(opaque_handles->kbdev); kfree(opaque_handles); } @@ -307,11 +344,21 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); static void dump_worker(struct work_struct *work) { + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; struct kbase_gator_hwcnt_handles *hand; hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); - if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL)) { + errcode = kbase_hwcnt_virtualizer_client_dump( + hand->hvcli, &ts_start_ns, &ts_end_ns, &hand->dump_buf); + if (!errcode) { + /* Patch the header to hide other client's counter choices */ + kbase_hwcnt_gpu_patch_dump_headers( + &hand->dump_buf, &hand->enable_map); + /* Zero all non-enabled counters (currently undefined values) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hand->dump_buf, &hand->enable_map); spin_lock_bh(&hand->dump_lock); hand->dump_complete = 1; spin_unlock_bh(&hand->dump_lock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h index a17870d03b21..c1e315b0f534 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names_ttrx.h @@ -116,10 +116,10 @@ static const char * const hardware_counters_mali_tTRx[] = { "", "TTRx_BUS_WRITE", "TTRx_LOADING_DESC", - "", - "", - "", - "", + "TTRx_IDVS_POS_SHAD_REQ", + "TTRx_IDVS_POS_SHAD_WAIT", + "TTRx_IDVS_POS_SHAD_STALL", + "TTRx_IDVS_POS_FIFO_FULL", "TTRx_PREFETCH_STALL", "TTRx_VCACHE_HIT", "TTRx_VCACHE_MISS", @@ -129,11 +129,11 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_VFETCH_STALL", "TTRx_PRIMASSY_STALL", "TTRx_BBOX_GEN_STALL", - "", - "", - "", - "", - "", + "TTRx_IDVS_VBU_HIT", + "TTRx_IDVS_VBU_MISS", + "TTRx_IDVS_VBU_LINE_DEALLOCATE", + "TTRx_IDVS_VAR_SHAD_REQ", + "TTRx_IDVS_VAR_SHAD_STALL", "TTRx_BINNER_STALL", "TTRx_ITER_STALL", "TTRx_COMPRESS_MISS", @@ -196,15 +196,15 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_EXEC_ICACHE_MISS", "TTRx_EXEC_STARVE_ARITH", "TTRx_CALL_BLEND_SHADER", - "TTRx_TEX_INSTR", - "TTRx_TEX_INSTR_MIPMAP", - "TTRx_TEX_INSTR_COMPRESSED", - "TTRx_TEX_INSTR_3D", - "TTRx_TEX_INSTR_TRILINEAR", - "TTRx_TEX_COORD_ISSUE", - "TTRx_TEX_COORD_STALL", - "TTRx_TEX_STARVE_CACHE", - "TTRx_TEX_STARVE_FILTER", + "TTRx_TEX_MSGI_NUM_QUADS", + "TTRx_TEX_DFCH_NUM_PASSES", + "TTRx_TEX_DFCH_NUM_PASSES_MISS", + "TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP", + "TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP", + "TTRx_TEX_TFCH_NUM_LINES_FETCHED", + "TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", + "TTRx_TEX_TFCH_NUM_OPERATIONS", + "TTRx_TEX_FILT_NUM_OPERATIONS", "TTRx_LS_MEM_READ_FULL", "TTRx_LS_MEM_READ_SHORT", "TTRx_LS_MEM_WRITE_FULL", @@ -222,9 +222,9 @@ static const char * const hardware_counters_mali_tTRx[] = { "TTRx_BEATS_RD_TEX", "TTRx_BEATS_RD_TEX_EXT", "TTRx_BEATS_RD_OTHER", - "", + "TTRx_BEATS_WR_LSC_OTHER", "TTRx_BEATS_WR_TIB", - "TTRx_BEATS_WR_LSC", + "TTRx_BEATS_WR_LSC_WB", /* Performance counters for the Memory System */ "", diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h index 218e63a61c6c..d432f8e056c9 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -110,8 +110,14 @@ #define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) #define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) #define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(8, 0) -#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) #define GPU_ID2_PRODUCT_TBOX GPU_ID2_MODEL_MAKE(8, 2) +#define GPU_ID2_PRODUCT_TEGX GPU_ID2_MODEL_MAKE(8, 3) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) +#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) +#define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0) +#define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3) +#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/drivers/gpu/arm/midgard/mali_kbase_gwt.c index b36254641327..0481f80fec75 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_gwt.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.c @@ -54,8 +54,6 @@ static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, { kbase_gpu_gwt_setup_page_permission(kctx, flag, rb_first(&(kctx->reg_rbtree_same))); - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_exec))); kbase_gpu_gwt_setup_page_permission(kctx, flag, rb_first(&(kctx->reg_rbtree_custom))); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index f34f53a919b8..450926c3d35f 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -65,12 +65,30 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TKAX: features = base_hw_features_tKAx; break; + case GPU_ID2_PRODUCT_TEGX: + features = base_hw_features_tEGx; + break; case GPU_ID2_PRODUCT_TTRX: features = base_hw_features_tTRx; break; + case GPU_ID2_PRODUCT_TNAX: + features = base_hw_features_tNAx; + break; + case GPU_ID2_PRODUCT_TBEX: + features = base_hw_features_tBEx; + break; + case GPU_ID2_PRODUCT_TULX: + features = base_hw_features_tULx; + break; case GPU_ID2_PRODUCT_TBOX: features = base_hw_features_tBOx; break; + case GPU_ID2_PRODUCT_TIDX: + features = base_hw_features_tIDx; + break; + case GPU_ID2_PRODUCT_TVAX: + features = base_hw_features_tVAx; + break; default: features = base_hw_features_generic; break; @@ -183,13 +201,37 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0}, {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TEGX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tEGx_r0p0}, + {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TTRX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TNAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TBEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TULX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, + {U32_MAX, NULL} } }, + {GPU_ID2_PRODUCT_TBOX, {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0}, {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TIDX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tIDx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TVAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, + {U32_MAX, NULL} } }, }; u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; @@ -208,10 +250,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( if (product != NULL) { /* Found a matching product. */ const u32 version = gpu_id & GPU_ID2_VERSION; -#if !MALI_CUSTOMER_RELEASE u32 fallback_version = 0; const enum base_hw_issue *fallback_issues = NULL; -#endif size_t v; /* Stop when we reach the end of the map. */ @@ -223,25 +263,34 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( break; } -#if !MALI_CUSTOMER_RELEASE /* Check whether this is a candidate for most recent known version not later than the actual version. */ if ((version > product->map[v].version) && (product->map[v].version >= fallback_version)) { - fallback_version = product->map[v].version; - fallback_issues = product->map[v].issues; - } +#if MALI_CUSTOMER_RELEASE + /* Match on version's major and minor fields */ + if (((version ^ product->map[v].version) >> + GPU_ID2_VERSION_MINOR_SHIFT) == 0) #endif + { + fallback_version = product->map[v].version; + fallback_issues = product->map[v].issues; + } + } } -#if !MALI_CUSTOMER_RELEASE if ((issues == NULL) && (fallback_issues != NULL)) { /* Fall back to the issue set of the most recent known version not later than the actual version. */ issues = fallback_issues; +#if MALI_CUSTOMER_RELEASE + dev_warn(kbdev->dev, + "GPU hardware issue table may need updating:\n" +#else dev_info(kbdev->dev, +#endif "r%dp%d status %d is unknown; treating as r%dp%d status %d", (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT, @@ -263,7 +312,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( kbase_gpuprops_update_core_props_gpu_id( &kbdev->gpu_props.props); } -#endif } return issues; } @@ -416,12 +464,30 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TKAX: issues = base_hw_issues_model_tKAx; break; + case GPU_ID2_PRODUCT_TEGX: + issues = base_hw_issues_model_tEGx; + break; case GPU_ID2_PRODUCT_TTRX: issues = base_hw_issues_model_tTRx; break; + case GPU_ID2_PRODUCT_TNAX: + issues = base_hw_issues_model_tNAx; + break; + case GPU_ID2_PRODUCT_TBEX: + issues = base_hw_issues_model_tBEx; + break; + case GPU_ID2_PRODUCT_TULX: + issues = base_hw_issues_model_tULx; + break; case GPU_ID2_PRODUCT_TBOX: issues = base_hw_issues_model_tBOx; break; + case GPU_ID2_PRODUCT_TIDX: + issues = base_hw_issues_model_tIDx; + break; + case GPU_ID2_PRODUCT_TVAX: + issues = base_hw_issues_model_tVAx; + break; default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h index 0c5ceffb0e47..d5b90994790b 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,28 @@ #include /** - * kbase_instr_hwcnt_enable_internal - Enable HW counters collection + * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. + * @dump_buffer: GPU address to write counters to. + * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. + * @jm_bm: counters selection bitmask (JM). + * @shader_bm: counters selection bitmask (Shader). + * @tiler_bm: counters selection bitmask (Tiler). + * @mmu_l2_bm: counters selection bitmask (MMU_L2). + * @use_secondary: use secondary performance counters set for applicable + * counter blocks. + */ +struct kbase_instr_hwcnt_enable { + u64 dump_buffer; + u64 dump_buffer_bytes; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + bool use_secondary; +}; + +/** + * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection * @kbdev: Kbase device * @kctx: Kbase context * @enable: HW counter setup parameters @@ -43,10 +64,10 @@ */ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable); + struct kbase_instr_hwcnt_enable *enable); /** - * kbase_instr_hwcnt_disable_internal - Disable HW counters collection + * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection * @kctx: Kbase context * * Context: might sleep, waiting for an ongoing dump to complete diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index ea87913a7a39..e2798eb09aea 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -128,7 +128,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, struct kbase_context *kctx); /** - * kbase_backend_cacheclean - Perform a cache clean if the given atom requires + * kbase_backend_cache_clean - Perform a cache clean if the given atom requires * one * @kbdev: Device pointer * @katom: Pointer to the failed atom @@ -136,7 +136,7 @@ void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, * On some GPUs, the GPU cache must be cleaned following a failed atom. This * function performs a clean if it is required by @katom. */ -void kbase_backend_cacheclean(struct kbase_device *kbdev, +void kbase_backend_cache_clean(struct kbase_device *kbdev, struct kbase_jd_atom *katom); @@ -160,15 +160,12 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, * any scheduling has taken place. * @kbdev: Device pointer * @core_req: Core requirements of atom - * @affinity: Affinity of atom - * @coreref_state: Coreref state of atom * * This function should only be called from kbase_jd_done_worker() or * js_return_worker(). */ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req, u64 affinity, - enum kbase_atom_coreref_state coreref_state); + base_jd_core_req core_req); /** * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU @@ -178,17 +175,6 @@ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, */ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); -/** - * kbase_backend_inspect_head() - Return the atom currently at the head of slot - * @js - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return : Atom currently at the head of slot @js, or NULL - */ -struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, - int js); - /** * kbase_backend_inspect_tail - Return the atom currently at the tail of slot * @js @@ -289,7 +275,6 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); */ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); -#if KBASE_GPU_RESET_EN /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. * @kbdev: Device pointer @@ -357,8 +342,11 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev); * of the GPU as part of normal processing (e.g. exiting protected mode) where * the driver will have ensured the scheduler has been idled and all other * users of the GPU (e.g. instrumentation) have been suspended. + * + * Return: 0 if the reset was started successfully + * -EAGAIN if another reset is currently in progress */ -void kbase_reset_gpu_silent(struct kbase_device *kbdev); +int kbase_reset_gpu_silent(struct kbase_device *kbdev); /** * kbase_reset_gpu_active - Reports if the GPU is being reset @@ -367,7 +355,6 @@ void kbase_reset_gpu_silent(struct kbase_device *kbdev); * Return: True if the GPU is in the process of being reset. */ bool kbase_reset_gpu_active(struct kbase_device *kbdev); -#endif /** * kbase_job_slot_hardstop - Hard-stop the specified job slot diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h index 4598d8099765..5bb38872f4e7 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,13 +44,23 @@ struct kbase_device; * * Must be called before any other power management function * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the power management framework was successfully initialized. + */ +int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev); + +/** + * Initialize the power management framework. + * + * Must be called before any other power management function (except + * @ref kbase_hwaccess_pm_early_init) + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * @return 0 if the power management framework was successfully - * initialized. + * Return: 0 if the power management framework was successfully initialized. */ -int kbase_hwaccess_pm_init(struct kbase_device *kbdev); +int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev); /** * Terminate the power management framework. @@ -58,10 +68,19 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev); * No power management functions may be called after this (except * @ref kbase_pm_init) * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev); + +/** + * Terminate the power management framework. + * + * No power management functions may be called after this (except + * @ref kbase_hwaccess_pm_early_term or @ref kbase_hwaccess_pm_late_init) + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ -void kbase_hwaccess_pm_term(struct kbase_device *kbdev); +void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev); /** * kbase_hwaccess_pm_powerup - Power up the GPU. diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h index 7f64936f011e..f7539f5b46c6 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,7 +41,7 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, /** * kbase_wait_write_flush() - Wait for GPU write flush - * @kctx: Context pointer + * @kbdev: Kbase device * * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush * its write buffer. @@ -51,8 +51,12 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, * * This function is only in use for BASE_HW_ISSUE_6367 */ -#ifndef CONFIG_MALI_NO_MALI -void kbase_wait_write_flush(struct kbase_context *kctx); +#ifdef CONFIG_MALI_NO_MALI +static inline void kbase_wait_write_flush(struct kbase_device *kbdev) +{ +} +#else +void kbase_wait_write_flush(struct kbase_device *kbdev); #endif #endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c new file mode 100644 index 000000000000..efbac6fc8cd6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c @@ -0,0 +1,796 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of hardware counter context and accumulator APIs. + */ + +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_backend.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include +#include + +/** + * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. + * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. + * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. + * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are + * any enabled counters. + */ +enum kbase_hwcnt_accum_state { + ACCUM_STATE_ERROR, + ACCUM_STATE_DISABLED, + ACCUM_STATE_ENABLED +}; + +/** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or + * disabled->error requires state_lock. + * - State transition from enabled->disabled or + * enabled->error requires both accum_lock and + * state_lock. + * - Error state persists until next disable. + * @enable_map: The current set of enabled counters. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map_any_enabled. + * @enable_map_any_enabled: True if any counters in the map are enabled, else + * false. If true, and state is ACCUM_STATE_ENABLED, + * then the counter backend will be enabled. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map. + * @scratch_map: Scratch enable map, used as temporary enable map + * storage during dumps. + * - Must only be read or modified while holding + * accum_lock. + * @accum_buf: Accumulation buffer, where dumps will be accumulated + * into on transition to a disable state. + * - Must only be read or modified while holding + * accum_lock. + * @accumulated: True if the accumulation buffer has been accumulated + * into and not subsequently read from yet, else false. + * - Must only be read or modified while holding + * accum_lock. + * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent + * dump that was requested by the user. + * - Must only be read or modified while holding + * accum_lock. + */ +struct kbase_hwcnt_accumulator { + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; + bool enable_map_any_enabled; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool accumulated; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_context - Hardware counter context structure. + * @iface: Pointer to hardware counter backend interface. + * @state_lock: Spinlock protecting state. + * @disable_count: Disable count of the context. Initialised to 1. + * Decremented when the accumulator is acquired, and incremented + * on release. Incremented on calls to + * kbase_hwcnt_context_disable[_atomic], and decremented on + * calls to kbase_hwcnt_context_enable. + * - Must only be read or modified while holding state_lock. + * @accum_lock: Mutex protecting accumulator. + * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or + * termination. Set to true before accumulator initialisation, + * and false after accumulator termination. + * - Must only be modified while holding both accum_lock and + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. + */ +struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; + spinlock_t state_lock; + size_t disable_count; + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; +}; + +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) +{ + struct kbase_hwcnt_context *hctx = NULL; + + if (!iface || !out_hctx) + return -EINVAL; + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) + return -ENOMEM; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); + hctx->disable_count = 1; + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + + *out_hctx = hctx; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); + +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return; + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); + kfree(hctx); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); + +/** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); + kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); + kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); + hctx->iface->term(hctx->accum.backend); + memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + +/** + * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ + int errcode; + + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + errcode = hctx->iface->init( + hctx->iface->info, &hctx->accum.backend); + if (errcode) + goto error; + + hctx->accum.state = ACCUM_STATE_ERROR; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hctx->iface->metadata, &hctx->accum.accum_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.scratch_map); + if (errcode) + goto error; + + hctx->accum.accumulated = false; + + hctx->accum.ts_last_dump_ns = + hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; + +error: + kbasep_hwcnt_accumulator_term(hctx); + return errcode; +} + +/** + * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the + * disabled state, from the enabled or + * error states. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_accumulator_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + int errcode = 0; + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + WARN_ON(!hctx->accum_inited); + + accum = &hctx->accum; + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count != 0); + WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && + (accum->enable_map_any_enabled)) + backend_enabled = true; + + if (!backend_enabled) + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Early out if the backend is not already enabled */ + if (!backend_enabled) + return; + + if (!accumulate) + goto disable; + + /* Try and accumulate before disabling */ + errcode = hctx->iface->dump_request(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_get(accum->backend, + &accum->accum_buf, &accum->enable_map, accum->accumulated); + if (errcode) + goto disable; + + accum->accumulated = true; + +disable: + hctx->iface->dump_disable(accum->backend); + + /* Regardless of any errors during the accumulate, put the accumulator + * in the disabled state. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +/** + * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the + * enabled state, from the disabled state. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ + int errcode = 0; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->state_lock); + WARN_ON(!hctx->accum_inited); + WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + + accum = &hctx->accum; + + /* The backend only needs enabling if any counters are enabled */ + if (accum->enable_map_any_enabled) + errcode = hctx->iface->dump_enable_nolock( + accum->backend, &accum->enable_map); + + if (!errcode) + accum->state = ACCUM_STATE_ENABLED; + else + accum->state = ACCUM_STATE_ERROR; +} + +/** + * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date + * values of enabled counters possible, and + * optionally update the set of enabled + * counters. + * @hctx : Non-NULL pointer to the hardware counter context + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * @new_map: Pointer to the new counter enable map. If non-NULL, must have + * the same metadata as the accumulator. If NULL, the set of + * enabled counters will be unchanged. + */ +static int kbasep_hwcnt_accumulator_dump( + struct kbase_hwcnt_context *hctx, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) +{ + int errcode = 0; + unsigned long flags; + enum kbase_hwcnt_accum_state state; + bool dump_requested = false; + bool dump_written = false; + bool cur_map_any_enabled; + struct kbase_hwcnt_enable_map *cur_map; + bool new_map_any_enabled = false; + u64 dump_time_ns; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + + accum = &hctx->accum; + cur_map = &accum->scratch_map; + + /* Save out info about the current enable map */ + cur_map_any_enabled = accum->enable_map_any_enabled; + kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + + if (new_map) + new_map_any_enabled = + kbase_hwcnt_enable_map_any_enabled(new_map); + + /* + * We're holding accum_lock, so the accumulator state might transition + * from disabled to enabled during this function (as enabling is lock + * free), but it will never disable (as disabling needs to hold the + * accum_lock), nor will it ever transition from enabled to error (as + * an enable while we're already enabled is impossible). + * + * If we're already disabled, we'll only look at the accumulation buffer + * rather than do a real dump, so a concurrent enable does not affect + * us. + * + * If a concurrent enable fails, we might transition to the error + * state, but again, as we're only looking at the accumulation buffer, + * it's not an issue. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + state = accum->state; + + /* + * Update the new map now, such that if an enable occurs during this + * dump then that enable will set the new map. If we're already enabled, + * then we'll do it ourselves after the dump. + */ + if (new_map) { + kbase_hwcnt_enable_map_copy( + &accum->enable_map, new_map); + accum->enable_map_any_enabled = new_map_any_enabled; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Error state, so early out. No need to roll back any map updates */ + if (state == ACCUM_STATE_ERROR) + return -EIO; + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { + /* Disable pre-emption, to make the timestamp as accurate as + * possible. + */ + preempt_disable(); + { + dump_time_ns = hctx->iface->timestamp_ns( + accum->backend); + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend); + dump_requested = true; + } else { + errcode = hctx->iface->dump_clear( + accum->backend); + } + } + preempt_enable(); + if (errcode) + goto error; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + + /* Copy any accumulation into the dest buffer */ + if (accum->accumulated && dump_buf) { + kbase_hwcnt_dump_buffer_copy( + dump_buf, &accum->accum_buf, cur_map); + dump_written = true; + } + + /* Wait for any requested dumps to complete */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto error; + } + + /* If we're enabled and there's a new enable map, change the enabled set + * as soon after the dump has completed as possible. + */ + if ((state == ACCUM_STATE_ENABLED) && new_map) { + /* Backend is only enabled if there were any enabled counters */ + if (cur_map_any_enabled) + hctx->iface->dump_disable(accum->backend); + + /* (Re-)enable the backend if the new map has enabled counters. + * No need to acquire the spinlock, as concurrent enable while + * we're already enabled and holding accum_lock is impossible. + */ + if (new_map_any_enabled) { + errcode = hctx->iface->dump_enable( + accum->backend, new_map); + if (errcode) + goto error; + } + } + + /* Copy, accumulate, or zero into the dest buffer to finish */ + if (dump_buf) { + /* If we dumped, copy or accumulate it into the destination */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_get( + accum->backend, + dump_buf, + cur_map, + dump_written); + if (errcode) + goto error; + dump_written = true; + } + + /* If we've not written anything into the dump buffer so far, it + * means there was nothing to write. Zero any enabled counters. + */ + if (!dump_written) + kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + } + + /* Write out timestamps */ + *ts_start_ns = accum->ts_last_dump_ns; + *ts_end_ns = dump_time_ns; + + accum->accumulated = false; + accum->ts_last_dump_ns = dump_time_ns; + + return 0; +error: + /* An error was only physically possible if the backend was enabled */ + WARN_ON(state != ACCUM_STATE_ENABLED); + + /* Disable the backend, and transition to the error state */ + hctx->iface->dump_disable(accum->backend); + spin_lock_irqsave(&hctx->state_lock, flags); + + accum->state = ACCUM_STATE_ERROR; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return errcode; +} + +/** + * kbasep_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_context_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + + if (!kbase_hwcnt_context_disable_atomic(hctx)) { + kbasep_hwcnt_accumulator_disable(hctx, accumulate); + + spin_lock_irqsave(&hctx->state_lock, flags); + + /* Atomic disable failed and we're holding the mutex, so current + * disable count must be 0. + */ + WARN_ON(hctx->disable_count != 0); + hctx->disable_count++; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +} + +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) +{ + int errcode = 0; + unsigned long flags; + + if (!hctx || !accum) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!hctx->accum_inited) + /* Set accum initing now to prevent concurrent init */ + hctx->accum_inited = true; + else + /* Already have an accum, or already being inited */ + errcode = -EBUSY; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + if (errcode) + return errcode; + + errcode = kbasep_hwcnt_accumulator_init(hctx); + + if (errcode) { + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + return errcode; + } + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count == 0); + WARN_ON(hctx->accum.enable_map_any_enabled); + + /* Decrement the disable count to allow the accumulator to be accessible + * now that it's fully constructed. + */ + hctx->disable_count--; + + /* + * Make sure the accumulator is initialised to the correct state. + * Regardless of initial state, counters don't need to be enabled via + * the backend, as the initial enable map has no enabled counters. + */ + hctx->accum.state = (hctx->disable_count == 0) ? + ACCUM_STATE_ENABLED : + ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + *accum = &hctx->accum; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); + +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ + unsigned long flags; + struct kbase_hwcnt_context *hctx; + + if (!accum) + return; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + mutex_lock(&hctx->accum_lock); + + /* Double release is a programming error */ + WARN_ON(!hctx->accum_inited); + + /* Disable the context to ensure the accumulator is inaccesible while + * we're destroying it. This performs the corresponding disable count + * increment to the decrement done during acquisition. + */ + kbasep_hwcnt_context_disable(hctx, false); + + mutex_unlock(&hctx->accum_lock); + + kbasep_hwcnt_accumulator_term(hctx); + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); + +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ + if (WARN_ON(!hctx)) + return; + + /* Try and atomically disable first, so we can avoid locking the mutex + * if we don't need to. + */ + if (kbase_hwcnt_context_disable_atomic(hctx)) + return; + + mutex_lock(&hctx->accum_lock); + + kbasep_hwcnt_context_disable(hctx, true); + + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); + +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + bool atomic_disabled = false; + + if (WARN_ON(!hctx)) + return false; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { + /* + * If disable count is non-zero or no counters are enabled, we + * can just bump the disable count. + * + * Otherwise, we can't disable in an atomic context. + */ + if (hctx->disable_count != 0) { + hctx->disable_count++; + atomic_disabled = true; + } else { + WARN_ON(!hctx->accum_inited); + if (!hctx->accum.enable_map_any_enabled) { + hctx->disable_count++; + hctx->accum.state = ACCUM_STATE_DISABLED; + atomic_disabled = true; + } + } + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return atomic_disabled; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); + +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + + if (WARN_ON(!hctx)) + return; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == 0)) { + if (hctx->disable_count == 1) + kbasep_hwcnt_accumulator_enable(hctx); + + hctx->disable_count--; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); + +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return NULL; + + return hctx->iface->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); + +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if ((new_map->metadata != hctx->iface->metadata) || + (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); + +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h new file mode 100644 index 000000000000..fc45743e264c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h @@ -0,0 +1,137 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter accumulator API. + */ + +#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ +#define _KBASE_HWCNT_ACCUMULATOR_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_accumulator; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator + * for a hardware counter context. + * @hctx: Non-NULL pointer to a hardware counter context. + * @accum: Non-NULL pointer to where the pointer to the created accumulator + * will be stored on success. + * + * There can exist at most one instance of the hardware counter accumulator per + * context at a time. + * + * If multiple clients need access to the hardware counters at the same time, + * then an abstraction built on top of the single instance to the hardware + * counter accumulator is required. + * + * No counters will be enabled with the returned accumulator. A subsequent call + * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. + * + * There are four components to a hardware counter dump: + * - A set of enabled counters + * - A start time + * - An end time + * - A dump buffer containing the accumulated counter values for all enabled + * counters between the start and end times. + * + * For each dump, it is guaranteed that all enabled counters were active for the + * entirety of the period between the start and end times. + * + * It is also guaranteed that the start time of dump "n" is always equal to the + * end time of dump "n - 1". + * + * For all dumps, the values of any counters that were not enabled is undefined. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); + +/** + * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * The accumulator must be released before the context the accumulator was + * created from is terminated. + */ +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** + * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently + * enabled counters, and enable a new + * set of counters that will be used + * for subsequent dumps. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @new_map: Non-NULL pointer to the new counter enable map. Must have the + * same metadata as the accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled + * counters. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h new file mode 100644 index 000000000000..b7aa0e1fa8e9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Virtual interface for hardware counter backends. + */ + +#ifndef _KBASE_HWCNT_BACKEND_H_ +#define _KBASE_HWCNT_BACKEND_H_ + +#include + +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to information used to + * create an instance of a hardware counter + * backend. + */ +struct kbase_hwcnt_backend_info; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter + * backend, used to perform dumps. + */ +struct kbase_hwcnt_backend; + +/** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * All uses of the created hardware counter backend must be externally + * synchronised. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_init_fn)( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); + +/** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +typedef void (*kbase_hwcnt_backend_term_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend + * timestamp. + * @backend: Non-NULL pointer to backend. + * + * Return: Backend timestamp in nanoseconds. + */ +typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the + * backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * The enable_map must have been created using the interface's metadata. + * If the backend has already been enabled, an error is returned. + * + * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping + * with the backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be + * called in an atomic context with the spinlock documented by the specific + * backend interface held. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with + * the backend. + * @backend: Non-NULL pointer to backend. + * + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +typedef void (*kbase_hwcnt_backend_dump_disable_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped + * counters. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_clear_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_request_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested + * counter dump has completed. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_wait_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the + * counters dumped after the last dump + * request into the dump buffer. + * @backend: Non-NULL pointer to backend. + * @dump_buffer: Non-NULL pointer to destination dump buffer. + * @enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * + * If the backend is not enabled, returns an error. + * If a dump is in progress (i.e. dump_wait has not yet returned successfully) + * then the resultant contents of the dump buffer will be undefined. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_get_fn)( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); + +/** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. + * @metadata: Immutable hardware counter metadata. + * @info: Immutable info used to initialise an instance of the + * backend. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_clear: Function ptr to clear counters. + * @dump_request: Function ptr to request a dump. + * @dump_wait: Function ptr to wait until dump to complete. + * @dump_get: Function ptr to copy or accumulate dump into a dump + * buffer. + */ +struct kbase_hwcnt_backend_interface { + const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_backend_info *info; + kbase_hwcnt_backend_init_fn init; + kbase_hwcnt_backend_term_fn term; + kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn dump_disable; + kbase_hwcnt_backend_dump_clear_fn dump_clear; + kbase_hwcnt_backend_dump_request_fn dump_request; + kbase_hwcnt_backend_dump_wait_fn dump_wait; + kbase_hwcnt_backend_dump_get_fn dump_get; +}; + +#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c new file mode 100644 index 000000000000..4bc8916922b9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c @@ -0,0 +1,538 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_backend_gpu.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "mali_kbase_pm_policy.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_tlstream.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +/** + * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance + * of a GPU hardware counter backend. + * @kbdev: KBase device. + * @use_secondary: True if secondary performance counters should be used, + * else false. Ignored if secondary counters are not supported. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + */ +struct kbase_hwcnt_backend_gpu_info { + struct kbase_device *kbdev; + bool use_secondary; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; +}; + +/** + * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @kctx_element: List element used to add kctx to device context list. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @enabled: True if dumping has been enabled, else false. + */ +struct kbase_hwcnt_backend_gpu { + const struct kbase_hwcnt_backend_gpu_info *info; + struct kbase_context *kctx; + struct kbasep_kctx_list_element *kctx_element; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + bool enabled; +}; + +/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( + struct kbase_hwcnt_backend *backend) +{ + struct timespec ts; + + (void)backend; + getrawmonotonic(&ts); + return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys; + struct kbase_instr_hwcnt_enable enable; + + if (!backend_gpu || !enable_map || backend_gpu->enabled || + (enable_map->metadata != backend_gpu->info->metadata)) + return -EINVAL; + + kctx = backend_gpu->kctx; + kbdev = backend_gpu->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); + + enable.jm_bm = phys.jm_bm; + enable.shader_bm = phys.shader_bm; + enable.tiler_bm = phys.tiler_bm; + enable.mmu_l2_bm = phys.mmu_l2_bm; + enable.use_secondary = backend_gpu->info->use_secondary; + enable.dump_buffer = backend_gpu->gpu_dump_va; + enable.dump_buffer_bytes = backend_gpu->info->dump_bytes; + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_gpu->enabled = true; + + return 0; +error: + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_gpu_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + struct kbase_device *kbdev; + + if (!backend_gpu) + return -EINVAL; + + kbdev = backend_gpu->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock( + backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_gpu_dump_disable( + struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (WARN_ON(!backend_gpu) || !backend_gpu->enabled) + return; + + errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx); + WARN_ON(errcode); + + backend_gpu->enabled = false; +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_gpu_dump_clear( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_gpu_dump_request( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_request_dump(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_gpu_dump_wait( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !backend_gpu->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx); +} + +/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_gpu_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_gpu *backend_gpu = + (struct kbase_hwcnt_backend_gpu *)backend; + + if (!backend_gpu || !dst || !dst_enable_map || + (backend_gpu->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions( + backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); + + return kbase_hwcnt_gpu_dump_get( + dst, backend_gpu->cpu_dump_va, dst_enable_map, accumulate); +} + +/** + * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to GPU backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_dump_alloc( + const struct kbase_hwcnt_backend_gpu_info *info, + struct kbase_context *kctx, + u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | + BASE_MEM_PROT_GPU_WR | + BASE_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU; + + if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) + flags |= BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_gpu_dump_free( + struct kbase_context *kctx, + u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend. + * @backend: Pointer to GPU backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_gpu *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_gpu_dump_free( + kctx, backend->gpu_dump_va); + + if (backend->kctx_element) { + mutex_lock(&kbdev->kctx_list_lock); + + KBASE_TLSTREAM_TL_DEL_CTX(kctx); + list_del(&backend->kctx_element->link); + + mutex_unlock(&kbdev->kctx_list_lock); + kfree(backend->kctx_element); + } + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_create( + const struct kbase_hwcnt_backend_gpu_info *info, + struct kbase_hwcnt_backend_gpu **out_backend) +{ + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_gpu *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + + backend->kctx = kbase_create_context(kbdev, true); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + backend->kctx_element = kzalloc( + sizeof(*backend->kctx_element), GFP_KERNEL); + if (!backend->kctx_element) + goto alloc_error; + + backend->kctx_element->kctx = backend->kctx; + + /* Add kernel context to list of contexts associated with device. */ + mutex_lock(&kbdev->kctx_list_lock); + + list_add(&backend->kctx_element->link, &kbdev->kctx_list); + /* Fire tracepoint while lock is held, to ensure tracepoint is not + * created in both body and summary stream + */ + KBASE_TLSTREAM_TL_NEW_CTX( + backend->kctx, backend->kctx->id, (u32)(backend->kctx->tgid)); + + mutex_unlock(&kbdev->kctx_list_lock); + + errcode = kbasep_hwcnt_backend_gpu_dump_alloc( + info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, + backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va) + goto alloc_error; + +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); +#endif + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_gpu_destroy(backend); + return errcode; +} + +/* GPU backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_gpu_init( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_gpu *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_create( + (const struct kbase_hwcnt_backend_gpu_info *) info, &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* GPU backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_gpu_dump_disable(backend); + kbasep_hwcnt_backend_gpu_destroy( + (struct kbase_hwcnt_backend_gpu *)backend); +} + +/** + * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_gpu_info_destroy( + const struct kbase_hwcnt_backend_gpu_info *info) +{ + if (!info) + return; + + kbase_hwcnt_gpu_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_info_create( + struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_gpu_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; + struct kbase_hwcnt_backend_gpu_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + if (errcode) + return errcode; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto error; + + info->kbdev = kbdev; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + info->use_secondary = true; +#else + info->use_secondary = false; +#endif + + errcode = kbase_hwcnt_gpu_metadata_create( + &hwcnt_gpu_info, info->use_secondary, + &info->metadata, + &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_gpu_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_gpu_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_gpu_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->metadata = info->metadata; + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->init = kbasep_hwcnt_backend_gpu_init; + iface->term = kbasep_hwcnt_backend_gpu_term; + iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_gpu_info_destroy( + (const struct kbase_hwcnt_backend_gpu_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h new file mode 100644 index 000000000000..7712f1424a8b --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h @@ -0,0 +1,61 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_GPU_H_ +#define _KBASE_HWCNT_BACKEND_GPU_H_ + +#include "mali_kbase_hwcnt_backend.h" + +struct kbase_device; + +/** + * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_gpu_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_gpu_destroy( + struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h new file mode 100644 index 000000000000..bc50ad12c2f4 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h @@ -0,0 +1,119 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter context API. + */ + +#ifndef _KBASE_HWCNT_CONTEXT_H_ +#define _KBASE_HWCNT_CONTEXT_H_ + +#include + +struct kbase_hwcnt_backend_interface; +struct kbase_hwcnt_context; + +/** + * kbase_hwcnt_context_init() - Initialise a hardware counter context. + * @iface: Non-NULL pointer to a hardware counter backend interface. + * @out_hctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * On creation, the disable count of the context will be 0. + * A hardware counter accumulator can be acquired using a created context. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); + +/** + * kbase_hwcnt_context_term() - Terminate a hardware counter context. + * @hctx: Pointer to context to be terminated. + */ +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by + * the context, so related counter data + * structures can be created. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed + * before counters are disabled via the backend interface. + * + * Subsequent dumps via the accumulator while counters are disabled will first + * return the accumulated dump, then will return dumps with zeroed counters. + * + * After this function call returns, it is guaranteed that counters will not be + * enabled via the backend interface. + */ +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. + * @hctx: Pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already + * non-zero, or the accumulator has no counters set. + * + * After this function call returns true, it is guaranteed that counters will + * not be enabled via the backend interface. + * + * Return: True if the disable count was incremented, else False. + */ +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the + * backend interface. + * + * If an accumulator has been acquired and enabling counters fails for some + * reason, the accumulator will be placed into an error state. + * + * It is only valid to call this function one time for each prior returned call + * to kbase_hwcnt_context_disable. + * + * The spinlock documented in the backend interface that was passed in to + * kbase_hwcnt_context_init() must be held before calling this function. + */ +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c new file mode 100644 index 000000000000..647d3ecdf100 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c @@ -0,0 +1,716 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 +#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 +#define KBASE_HWCNT_V4_MAX_GROUPS \ + (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) +#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V4 counter block */ +#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 + +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + +/** + * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter + * metadata for a v4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * @metadata: Non-NULL pointer to where created metadata is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v4_create( + const struct kbase_hwcnt_gpu_v4_info *v4_info, + const struct kbase_hwcnt_metadata **metadata) +{ + size_t grp; + int errcode = -ENOMEM; + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description *grps; + size_t avail_mask_bit; + + WARN_ON(!v4_info); + WARN_ON(!metadata); + + /* Check if there are enough bits in the availability mask to represent + * all the hardware counter blocks in the system. + */ + if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) + return -EINVAL; + + grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); + if (!grps) + goto clean_up; + + desc.grp_cnt = v4_info->cg_count; + desc.grps = grps; + + for (grp = 0; grp < v4_info->cg_count; grp++) { + size_t blk; + size_t sc; + const u64 core_mask = v4_info->cgs[grp].core_mask; + struct kbase_hwcnt_block_description *blks = kcalloc( + KBASE_HWCNT_V4_BLOCKS_PER_GROUP, + sizeof(*blks), + GFP_KERNEL); + + if (!blks) + goto clean_up; + + grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; + grps[grp].blks = blks; + + for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { + blks[blk].inst_cnt = 1; + blks[blk].hdr_cnt = + KBASE_HWCNT_V4_HEADERS_PER_BLOCK; + blks[blk].ctr_cnt = + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; + } + + for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { + blks[sc].type = core_mask & (1ull << sc) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + } + + blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; + blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; + blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + blks[7].type = (grp == 0) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + + WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); + } + + /* Initialise the availability mask */ + desc.avail_mask = 0; + avail_mask_bit = 0; + + for (grp = 0; grp < desc.grp_cnt; grp++) { + size_t blk; + const struct kbase_hwcnt_block_description *blks = + desc.grps[grp].blks; + for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { + WARN_ON(blks[blk].inst_cnt != 1); + if (blks[blk].type != + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) + desc.avail_mask |= (1ull << avail_mask_bit); + + avail_mask_bit++; + } + } + + errcode = kbase_hwcnt_metadata_create(&desc, metadata); + + /* Always clean up, as metadata will make a copy of the input args */ +clean_up: + if (grps) { + for (grp = 0; grp < v4_info->cg_count; grp++) + kfree(grps[grp].blks); + kfree(grps); + } + return errcode; +} + +/** + * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a + * V4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * + * Return: Size of buffer the V4 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( + const struct kbase_hwcnt_gpu_v4_info *v4_info) +{ + return v4_info->cg_count * + KBASE_HWCNT_V4_BLOCKS_PER_GROUP * + KBASE_HWCNT_V4_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +/** + * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter + * metadata for a v5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + const struct kbase_hwcnt_gpu_v5_info *v5_info, + bool use_secondary, + const struct kbase_hwcnt_metadata **metadata) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description + blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t non_sc_block_count; + size_t sc_block_count; + + WARN_ON(!v5_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ + non_sc_block_count = 2 + v5_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(v5_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit + * availability mask can't physically represent that many cores as well + * as the other hardware blocks. + * Error out if there are more blocks than our implementation can + * support. + */ + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* One Job Manager block */ + blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* One Tiler block */ + blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* l2_count memsys blks */ + blks[2].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + blks[2].inst_cnt = v5_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in + * the core mask. However, the dump buffer memory requirements need to + * take into account the fact that the core mask may be non-contiguous. + * + * For example, a system with a core mask of 0b1011 has the same dump + * buffer memory requirements as a system with 0b1111, but requires more + * memory than a system with 0b0111. However, core 2 of the system with + * 0b1011 doesn't physically exist, and the dump buffer memory that + * accounts for that core will never be written to when we do a counter + * dump. + * + * We find the core mask's last set bit to determine the memory + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ + blks[3].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + + group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = 1; + desc.grps = &group; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ + desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); +} + +/** + * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a + * V5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * + * Return: Size of buffer the V5 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( + const struct kbase_hwcnt_gpu_v5_info *v5_info) +{ + WARN_ON(!v5_info); + return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * + KBASE_HWCNT_V5_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + if (!kbdev || !info) + return -EINVAL; + +#ifdef CONFIG_MALI_NO_MALI + /* NO_MALI uses V5 layout, regardless of the underlying platform. */ + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + info->v4.cg_count = kbdev->gpu_props.num_core_groups; + info->v4.cgs = kbdev->gpu_props.props.coherency_info.group; + } else { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = + props->coherency_info.group[0].core_mask; + + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = l2_count; + info->v5.core_mask = core_mask; + } +#endif + return 0; +} + +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + + if (!info || !out_metadata || !out_dump_bytes) + return -EINVAL; + + switch (info->type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); + errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( + &info->v4, &metadata); + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); + errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( + &info->v5, use_secondary, &metadata); + break; + default: + return -EINVAL; + } + if (errcode) + return errcode; + + /* + * Dump abstraction size should be exactly the same size and layout as + * the physical dump size, for backwards compatibility. + */ + WARN_ON(dump_bytes != metadata->dump_buf_bytes); + + *out_metadata = metadata; + *out_dump_bytes = dump_bytes; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); + +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); + +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; + + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + dump_src = (const u32 *)src; + src_offset = 0; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const size_t hdr_cnt = + kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + /* Early out if no values in the dest block are enabled */ + if (kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + } + } + + src_offset += (hdr_cnt + ctr_cnt); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); + +/** + * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block + * enable map abstraction to + * a physical block enable + * map. + * @lo: Low 64 bits of block enable map abstraction. + * @hi: High 64 bits of block enable map abstraction. + * + * The abstraction uses 128 bits to enable 128 block values, whereas the + * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. + * Therefore, this conversion is lossy. + * + * Return: 32-bit physical block enable map. + */ +static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( + u64 lo, + u64 hi) +{ + u32 phys = 0; + u64 dwords[2] = {lo, hi}; + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u64 dword = dwords[dword_idx]; + u16 packed = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u16 mask = + ((dword >> (dword_bit + 0)) & 0x1) | + ((dword >> (dword_bit + 1)) & 0x1) | + ((dword >> (dword_bit + 2)) & 0x1) | + ((dword >> (dword_bit + 3)) & 0x1); + packed |= (mask << hword_bit); + } + phys |= ((u32)packed) << (16 * dword_idx); + } + return phys; +} + +/** + * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical + * block enable map to a + * block enable map + * abstraction. + * @phys: Physical 32-bit block enable map + * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction + * will be stored. + * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction + * will be stored. + */ +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( + u32 phys, + u64 *lo, + u64 *hi) +{ + u64 dwords[2] = {0, 0}; + + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u16 packed = phys >> (16 * dword_idx); + u64 dword = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u64 mask = (packed >> (hword_bit)) & 0x1; + + dword |= mask << (dword_bit + 0); + dword |= mask << (dword_bit + 1); + dword |= mask << (dword_bit + 2); + dword |= mask << (dword_bit + 3); + } + dwords[dword_idx] = dword; + } + *lo = dwords[0]; + *hi = dwords[1]; +} + +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 jm_bm = 0; + u64 shader_bm = 0; + u64 tiler_bm = 0; + u64 mmu_l2_bm = 0; + + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = src->metadata; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + src, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + mmu_l2_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm |= *blk_map; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } + + dst->jm_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); + dst->shader_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); + dst->tiler_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); + dst->mmu_l2_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); + +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 ignored_hi; + u64 jm_bm; + u64 shader_bm; + u64 tiler_bm; + u64 mmu_l2_bm; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->jm_bm, &jm_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->shader_bm, &shader_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->tiler_bm, &tiler_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + *blk_map = mmu_l2_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + *blk_map = mmu_l2_bm; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); + +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!buf) || WARN_ON(!enable_map) || + WARN_ON(buf->metadata != enable_map->metadata)) + return; + + metadata = buf->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = + kbase_hwcnt_metadata_group_type(metadata, grp); + u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( + buf, grp, blk, blk_inst); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + const u32 prfcnt_en = + kbasep_hwcnt_backend_gpu_block_map_to_physical( + blk_map[0], 0); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h new file mode 100644 index 000000000000..509608a3d9b1 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h @@ -0,0 +1,249 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_HWCNT_GPU_H_ +#define _KBASE_HWCNT_GPU_H_ + +#include + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ +enum kbase_hwcnt_gpu_group_type { + KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, + KBASE_HWCNT_GPU_GROUP_TYPE_V5, +}; + +/** + * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. + */ +enum kbase_hwcnt_gpu_v4_block_type { + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, +}; + +/** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + */ +enum kbase_hwcnt_gpu_v5_block_type { + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, +}; + +/** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. + * @jm_bm: Job Manager counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ +struct kbase_hwcnt_physical_enable_map { + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +}; + +/** + * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. + * @cg_count: Core group count. + * @cgs: Non-NULL pointer to array of cg_count coherent group structures. + * + * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, + * where each core group may have a physically different layout. + */ +struct kbase_hwcnt_gpu_v4_info { + size_t cg_count; + const struct mali_base_gpu_coherent_group *cgs; +}; + +/** + * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + */ +struct kbase_hwcnt_gpu_v5_info { + size_t l2_count; + u64 core_mask; +}; + +/** + * struct kbase_hwcnt_gpu_info - Tagged union with information about the current + * GPU's hwcnt blocks. + * @type: GPU type. + * @v4: Info filled in if a v4 GPU. + * @v5: Info filled in if a v5 GPU. + */ +struct kbase_hwcnt_gpu_info { + enum kbase_hwcnt_gpu_group_type type; + union { + struct kbase_hwcnt_gpu_v4_info v4; + struct kbase_hwcnt_gpu_v5_info v5; + }; +}; + +/** + * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the + * hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + */ +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info); + +/** + * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the + * current GPU. + * @info: Non-NULL pointer to info struct initialised by + * kbase_hwcnt_gpu_info_init. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump + * buffer is stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + +/** + * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of + * kbase_hwcnt_gpu_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate); + +/** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction + * into a physical enable map. + * @dst: Non-NULL pointer to dst physical enable map. + * @src: Non-NULL pointer to src enable map abstraction. + * + * The src must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for + * every 4 counters, shared over all instances of a block. + */ +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + +/** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. + * @dst: Non-NULL pointer to dst enable map abstraction. + * @src: Non-NULL pointer to src physical enable map. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses + * more than 64, so the enable map abstraction has nowhere to store the enable + * information for the 64 non-existent counters. + */ +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); + +/** + * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter + * enable headers in a dump buffer to + * reflect the specified enable map. + * @buf: Non-NULL pointer to dump buffer to patch. + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from + * a call to kbase_hwcnt_gpu_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map + * used by the user. + */ +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); + +#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c new file mode 100644 index 000000000000..b0e6aee1b135 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" + +#include +#include + +/** + * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. + * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. + * @enable_map: Counter enable map. + * @dump_buf: Dump buffer used to manipulate dumps before copied to user. + * @hvcli: Hardware counter virtualizer client. + */ +struct kbase_hwcnt_legacy_client { + void __user *user_dump_buf; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; + struct kbase_hwcnt_virtualizer_client *hvcli; +}; + +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli) +{ + int errcode; + struct kbase_hwcnt_legacy_client *hlcli; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_em; + + if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + + hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); + if (!hlcli) + return -ENOMEM; + + hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); + if (errcode) + goto error; + + /* Translate from the ioctl enable map to the internal one */ + phys_em.jm_bm = enable->jm_bm; + phys_em.shader_bm = enable->shader_bm; + phys_em.tiler_bm = enable->tiler_bm; + phys_em.mmu_l2_bm = enable->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &hlcli->enable_map, &hlcli->hvcli); + if (errcode) + goto error; + + *out_hlcli = hlcli; + return 0; + +error: + kbase_hwcnt_legacy_client_destroy(hlcli); + return errcode; +} + +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) +{ + if (!hlcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); + kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); + kbase_hwcnt_enable_map_free(&hlcli->enable_map); + kfree(hlcli); +} + +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump into the kernel buffer */ + errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Copy into the user's buffer */ + errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf, + hlcli->dump_buf.metadata->dump_buf_bytes); + /* Non-zero errcode implies user buf was invalid or too small */ + if (errcode) + return -EFAULT; + + return 0; +} + +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump with a NULL buffer to clear this client's counters */ + return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, NULL); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h new file mode 100644 index 000000000000..7a610ae378a2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h @@ -0,0 +1,94 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Legacy hardware counter interface, giving userspace clients simple, + * synchronous access to hardware counters. + * + * Any functions operating on an single legacy hardware counter client instance + * must be externally synchronised. + * Different clients may safely be used concurrently. + */ + +#ifndef _KBASE_HWCNT_LEGACY_H_ +#define _KBASE_HWCNT_LEGACY_H_ + +struct kbase_hwcnt_legacy_client; +struct kbase_ioctl_hwcnt_enable; +struct kbase_hwcnt_virtualizer; + +/** + * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. + * @hvirt: Non-NULL pointer to hardware counter virtualizer the client + * should be attached to. + * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid + * pointer to a user dump buffer large enough to hold a dump, and + * the counters that should be enabled. + * @out_hlcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli); + +/** + * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter + * client. + * @hlcli: Pointer to the legacy hardware counter client. + * + * Will safely destroy a client in any partial state of construction. + */ +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the + * client's user buffer. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously dump hardware counters into the user buffer + * specified on client creation, with the counters specified on client creation. + * + * The counters are automatically cleared after each dump, such that the next + * dump performed will return the counter values accumulated between the time of + * this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter + * dump. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously clear the hardware counters, such that the + * next dump performed will return the counter values accumulated between the + * time of this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); + +#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c new file mode 100644 index 000000000000..1e9efde97c59 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c @@ -0,0 +1,538 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" + +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ + (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. + * @value: The value to align upwards. + * @alignment: The alignment. + * + * Return: A number greater than or equal to value that is aligned to alignment. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) +{ + char *buf; + struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_group_metadata *grp_mds; + size_t grp; + size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count; /* Number of u32 values (inc padding) */ + size_t avail_mask_bits; /* Number of availability mask bits */ + + size_t size; + size_t offset; + + if (!desc || !out_metadata) + return -EINVAL; + + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ + size = 0; + size += sizeof(struct kbase_hwcnt_metadata); + + /* Group metadata */ + size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + /* Block metadata */ + for (grp = 0; grp < desc->grp_cnt; grp++) { + size += sizeof(struct kbase_hwcnt_block_metadata) * + desc->grps[grp].blk_cnt; + } + + /* Single allocation for the entire metadata */ + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Use the allocated memory for the metadata and its members */ + + /* Bump allocate the top level metadata */ + offset = 0; + metadata = (struct kbase_hwcnt_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_metadata); + + /* Bump allocate the group metadata */ + grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + enable_map_count = 0; + dump_buf_count = 0; + avail_mask_bits = 0; + + for (grp = 0; grp < desc->grp_cnt; grp++) { + size_t blk; + + const struct kbase_hwcnt_group_description *grp_desc = + desc->grps + grp; + struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + + size_t group_enable_map_count = 0; + size_t group_dump_buffer_count = 0; + size_t group_avail_mask_bits = 0; + + /* Bump allocate this group's block metadata */ + struct kbase_hwcnt_block_metadata *blk_mds = + (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * + grp_desc->blk_cnt; + + /* Fill in each block in the group's information */ + for (blk = 0; blk < grp_desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = + grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = + blk_mds + blk; + const size_t n_values = + blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + blk_md->type = blk_desc->type; + blk_md->inst_cnt = blk_desc->inst_cnt; + blk_md->hdr_cnt = blk_desc->hdr_cnt; + blk_md->ctr_cnt = blk_desc->ctr_cnt; + blk_md->enable_map_index = group_enable_map_count; + blk_md->enable_map_stride = + kbase_hwcnt_bitfield_count(n_values); + blk_md->dump_buf_index = group_dump_buffer_count; + blk_md->dump_buf_stride = + KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + blk_md->avail_mask_index = group_avail_mask_bits; + + group_enable_map_count += + blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += + blk_md->dump_buf_stride * blk_md->inst_cnt; + group_avail_mask_bits += blk_md->inst_cnt; + } + + /* Fill in the group's information */ + grp_md->type = grp_desc->type; + grp_md->blk_cnt = grp_desc->blk_cnt; + grp_md->blk_metadata = blk_mds; + grp_md->enable_map_index = enable_map_count; + grp_md->dump_buf_index = dump_buf_count; + grp_md->avail_mask_index = avail_mask_bits; + + enable_map_count += group_enable_map_count; + dump_buf_count += group_dump_buffer_count; + avail_mask_bits += group_avail_mask_bits; + } + + /* Fill in the top level metadata's information */ + metadata->grp_cnt = desc->grp_cnt; + metadata->grp_metadata = grp_mds; + metadata->enable_map_bytes = + enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map + * bit per 4 bytes in the dump buffer. + */ + WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * + BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + + *out_metadata = metadata; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); + +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + kfree(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); + +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) +{ + u64 *enable_map_buf; + + if (!metadata || !enable_map) + return -EINVAL; + + enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + + enable_map->metadata = metadata; + enable_map->enable_map = enable_map_buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); + +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ + if (!enable_map) + return; + + kfree(enable_map->enable_map); + enable_map->enable_map = NULL; + enable_map->metadata = NULL; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); + +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + u32 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + + buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; + dump_buf->dump_buf = buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); + +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ + if (!dump_buf) + return; + + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); + +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + + if (!metadata || !dump_bufs) + return -EINVAL; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order(metadata->dump_buf_bytes * n); + addr = __get_free_pages(GFP_KERNEL, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + dump_bufs->page_addr = addr; + dump_bufs->page_order = order; + dump_bufs->buf_cnt = n; + dump_bufs->bufs = buffers; + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t offset = metadata->dump_buf_bytes * buf_idx; + + buffers[buf_idx].metadata = metadata; + buffers[buf_idx].dump_buf = (u32 *)(addr + offset); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); + +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); + +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); + +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst) +{ + if (WARN_ON(!dst)) + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); + +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + if (kbase_hwcnt_metadata_block_instance_avail( + metadata, grp, blk, blk_inst)) { + /* Block available, so only zero non-enabled values */ + kbase_hwcnt_dump_buffer_block_zero_non_enabled( + dst_blk, blk_em, val_cnt); + } else { + /* Block not available, so zero the entire thing */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); + +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); + +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); + +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t hdr_cnt; + size_t ctr_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); + +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + + kbase_hwcnt_dump_buffer_block_accumulate_strict( + dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h new file mode 100644 index 000000000000..4d78c8457574 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h @@ -0,0 +1,1087 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter types. + * Contains structures for describing the physical layout of hardware counter + * dump buffers and enable maps within a system. + * + * Also contains helper functions for manipulation of these dump buffers and + * enable maps. + * + * Through use of these structures and functions, hardware counters can be + * enabled, copied, accumulated, and generally manipulated in a generic way, + * regardless of the physical counter dump layout. + * + * Terminology: + * + * Hardware Counter System: + * A collection of hardware counter groups, making a full hardware counter + * system. + * Hardware Counter Group: + * A group of Hardware Counter Blocks (e.g. a t62x might have more than one + * core group, so has one counter group per core group, where each group + * may have a different number and layout of counter blocks). + * Hardware Counter Block: + * A block of hardware counters (e.g. shader block, tiler block). + * Hardware Counter Block Instance: + * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have + * 4 shader block instances). + * + * Block Header: + * A header value inside a counter block. Headers don't count anything, + * so it is only valid to copy or zero them. Headers are always the first + * values in the block. + * Block Counter: + * A counter value inside a counter block. Counters can be zeroed, copied, + * or accumulated. Counters are always immediately after the headers in the + * block. + * Block Value: + * A catch-all term for block headers and block counters. + * + * Enable Map: + * An array of u64 bitfields, where each bit either enables exactly one + * block value, or is unused (padding). + * Dump Buffer: + * An array of u32 values, where each u32 corresponds either to one block + * value, or is unused (padding). + * Availability Mask: + * A bitfield, where each bit corresponds to whether a block instance is + * physically available (e.g. an MP3 GPU may have a sparse core mask of + * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the + * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this + * case, the availability mask might be 0b1011111 (the exact layout will + * depend on the specific hardware architecture), with the 3 extra early bits + * corresponding to other block instances in the hardware counter system). + * Metadata: + * Structure describing the physical layout of the enable map and dump buffers + * for a specific hardware counter system. + * + */ + +#ifndef _KBASE_HWCNT_TYPES_H_ +#define _KBASE_HWCNT_TYPES_H_ + +#include +#include +#include +#include +#include "mali_malisw.h" + +/* Number of bytes in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + +/* Number of bits in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + +/* Number of bytes for each counter value */ +#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32)) + +/* Number of bits in an availability mask (i.e. max total number of block + * instances supported in a Hardware Counter System) + */ +#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + +/** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. + * @type: The arbitrary identifier used to identify the type of the block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + */ +struct kbase_hwcnt_block_description { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; +}; + +/** + * struct kbase_hwcnt_group_description - Description of one or more identical, + * contiguous Hardware Counter Groups. + * @type: The arbitrary identifier used to identify the type of the group. + * @blk_cnt: The number of types of Hardware Counter Block in the group. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each type of Hardware Counter Block in the group. + */ +struct kbase_hwcnt_group_description { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; +}; + +/** + * struct kbase_hwcnt_description - Description of a Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. + */ +struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout + * of a block in a Hardware Counter System's + * Dump Buffers and Enable Maps. + * @type: The arbitrary identifier used to identify the type of the + * block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Map bitfields of the Block Instances described by + * this metadata start. + * @enable_map_stride: Stride in u64s between the Enable Maps of each of the + * Block Instances described by this metadata. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the Block Instances described by this + * metadata start. + * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the + * Block Instances described by this metadata. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the Block Instances described + * by this metadata start. + */ +struct kbase_hwcnt_block_metadata { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; + size_t enable_map_index; + size_t enable_map_stride; + size_t dump_buf_index; + size_t dump_buf_stride; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout + * of a group of blocks in a Hardware + * Counter System's Dump Buffers and Enable + * Maps. + * @type: The arbitrary identifier used to identify the type of the + * group. + * @blk_cnt: The number of types of Hardware Counter Block in the + * group. + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, + * describing the physical layout of each type of Hardware + * Counter Block in the group. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Maps of the blocks within the group described by + * this metadata start. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the blocks within the group described by + * metadata start. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the blocks within the group + * described by this metadata start. + */ +struct kbase_hwcnt_group_metadata { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; + size_t enable_map_index; + size_t dump_buf_index; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_metadata - Metadata describing the physical layout + * of Dump Buffers and Enable Maps within a + * Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * describing the physical layout of each Hardware Counter + * Group in the system. + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. + */ +struct kbase_hwcnt_metadata { + size_t grp_cnt; + const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. + * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array + * of u64 bitfields, each bit of which enables one hardware + * counter. + */ +struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; + u64 *enable_map; +}; + +/** + * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32 + * values. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array + * of u32 values. + */ +struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u32 *dump_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. + * @page_addr: Address of allocated pages. A single allocation is used for all + * Dump Buffers in the array. + * @page_order: The allocation order of the pages. + * @buf_cnt: The number of allocated Dump Buffers. + * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ +struct kbase_hwcnt_dump_buffer_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** + * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object + * from a description. + * @desc: Non-NULL pointer to a hardware counter description. + * @metadata: Non-NULL pointer to where created metadata will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); + +/** + * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. + * @metadata: Pointer to hardware counter metadata + */ +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * @metadata: Non-NULL pointer to metadata. + * + * Return: Number of hardware counter groups described by metadata. + */ +#define kbase_hwcnt_metadata_group_count(metadata) \ + ((metadata)->grp_cnt) + +/** + * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Type of the group grp. + */ +#define kbase_hwcnt_metadata_group_type(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].type) + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Number of blocks in group grp. + */ +#define kbase_hwcnt_metadata_block_count(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].blk_cnt) + +/** + * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Type of the block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type) + +/** + * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of + * a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt) + +/** + * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter + * headers. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counter headers in each instance of block blk in + * group grp. + */ +#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) + +/** + * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counters in each instance of block blk in group + * grp. + */ +#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) + +/** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 headers plus counters in each instance of block blk + * in group grp. + */ +#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ + (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \ + + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk))) + +/** + * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in + * the metadata. + * @md: Non-NULL pointer to metadata. + * @grp: size_t variable used as group iterator. + * @blk: size_t variable used as block iterator. + * @blk_inst: size_t variable used as block instance iterator. + * + * Iteration order is group, then block, then block instance (i.e. linearly + * through memory). + */ +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) + +/** + * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail + * mask corresponding to the block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: The bit index into the avail mask for the block. + */ +static inline size_t kbase_hwcnt_metadata_block_avail_bit( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk) +{ + const size_t bit = + metadata->grp_metadata[grp].avail_mask_index + + metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + + return bit; +} + +/** + * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is + * available. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if the block instance is available, else false. + */ +static inline bool kbase_hwcnt_metadata_block_instance_avail( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t bit = kbase_hwcnt_metadata_block_avail_bit( + metadata, grp, blk) + blk_inst; + const u64 mask = 1ull << bit; + + return (metadata->avail_mask & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. + * @metadata: Non-NULL pointer to metadata describing the system. + * @enable_map: Non-NULL pointer to enable map to be initialised. Will be + * initialised to all zeroes (i.e. all counters disabled). + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_free() - Free an enable map. + * @enable_map: Enable map to be freed. + * + * Can be safely called on an all-zeroed enable map structure, or on an already + * freed enable map. + */ +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block + * instance's enable map. + * @map: Non-NULL pointer to (const) enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u64* to the bitfield(s) used as the enable map for the + * block instance. + */ +#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ + ((map)->enable_map + \ + (map)->metadata->grp_metadata[(grp)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) + +/** + * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * to have at minimum one bit per value. + * @val_cnt: Number of values. + * + * Return: Number of required bitfields. + */ +static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / + KBASE_HWCNT_BITFIELD_BITS; +} + +/** + * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_disable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); +} + +/** + * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. + * @dst: Non-NULL pointer to enable map to zero. + */ +static inline void kbase_hwcnt_enable_map_disable_all( + struct kbase_hwcnt_enable_map *dst) +{ + memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_enable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + block_enable_map[bitfld_idx] = block_enable_map_mask; + } +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable + * map. + * @dst: Non-NULL pointer to enable map. + */ +static inline void kbase_hwcnt_enable_map_enable_all( + struct kbase_hwcnt_enable_map *dst) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all( + dst, grp, blk, blk_inst); +} + +/** + * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_copy( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + memcpy(dst->enable_map, + src->enable_map, + dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_union( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const size_t bitfld_count = + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + size_t i; + + for (i = 0; i < bitfld_count; i++) + dst->enable_map[i] |= src->enable_map[i]; +} + +/** + * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block + * instance are enabled. + * @enable_map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if any values in the block are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_enabled( + const struct kbase_hwcnt_enable_map *enable_map, + size_t grp, + size_t blk, + size_t blk_inst) +{ + bool any_enabled = false; + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + enable_map->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + any_enabled = any_enabled || + (block_enable_map[bitfld_idx] & block_enable_map_mask); + } + + return any_enabled; +} + +/** + * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. + * @enable_map: Non-NULL pointer to enable map. + * + * Return: true if any values are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_any_enabled( + const struct kbase_hwcnt_enable_map *enable_map) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block( + enable_map->metadata, grp, blk, blk_inst) { + if (kbase_hwcnt_enable_map_block_enabled( + enable_map, grp, blk, blk_inst)) + return true; + } + + return false; +} + +/** + * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block + * instance is enabled. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to check in the block instance. + * + * Return: true if the value was enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_value_enabled( + const u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + return (bitfld[idx] & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to enable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_enable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] |= mask; +} + +/** + * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to disable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_disable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] &= ~mask; +} + +/** + * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. + * @metadata: Non-NULL pointer to metadata describing the system. + * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy dest, + * or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed dump buffer structure, or on an already + * freed dump buffer. + */ +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each + * dump buffer in the array will be initialised to undefined values, + * so must be used as a copy dest, or cleared before use. + * + * A single contiguous page allocation will be used for all of the buffers + * inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. + * @dump_bufs: Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed dump buffer array structure, or on an + * already freed dump buffer array. + */ +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block + * instance's dump buffer. + * @buf: Non-NULL pointer to (const) dump buffer. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u32* to the dump buffer for the block instance. + */ +#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \ + ((buf)->dump_buf + \ + (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst)) + +/** + * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero( + u32 *dst_blk, + size_t val_cnt) +{ + memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. + * After the operation, all values + * (including padding bytes) will be + * zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dump buffer. + */ +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst); + +/** + * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in + * dst (including padding bytes and + * unavailable blocks). + * After the operation, all enabled + * values will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled + * values in a block. + * After the operation, all + * enabled values will be + * unchanged. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled( + u32 *dst_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) + dst_blk[val] = 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy( + u32 *dst_blk, + const u32 *src_blk, + size_t val_cnt) +{ + /* Copy all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to + * dst. + * After the operation, all non-enabled + * values (including padding bytes) will + * be zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values + * from src to dst. + * After the operation, all + * non-enabled values will be + * zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in dst will be zero. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, val); + + dst_blk[val] = val_enabled ? src_blk[val] : 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and + * accumulate all enabled counters from + * src to dst. + * After the operation, all non-enabled + * values will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and + * accumulate all block counters + * from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate( + u32 *dst_blk, + const u32 *src_blk, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + /* Copy all the headers in the block instance. + * Values of non-enabled headers are undefined. + */ + memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = accumulated; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and + * accumulate all enabled counters + * from src to dst. + * After the operation, all + * non-enabled values (including + * padding bytes) will be zero. + * Slower than the non-strict + * variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block + * headers and accumulate + * all block counters from + * src to dst. + * After the operation, all + * non-enabled values will + * be zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, hdr_cnt); + + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, ctr); + + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = ctr_enabled ? accumulated : 0; + } +} + +#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c new file mode 100644 index 000000000000..26e9852177cb --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c @@ -0,0 +1,688 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include + +/** + * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. + * @hctx: Hardware counter context being virtualized. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + */ +struct kbase_hwcnt_virtualizer { + struct kbase_hwcnt_context *hctx; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t client_count; + struct list_head clients; + struct kbase_hwcnt_accumulator *accum; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer scratch_buf; +}; + +/** + * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. + * @node: List node used for virtualizer client list. + * @hvirt: Hardware counter virtualizer. + * @enable_map: Enable map with client's current enabled counters. + * @accum_buf: Dump buffer with client's current accumulated counters. + * @has_accum: True if accum_buf contains any accumulated counters. + * @ts_start_ns: Counter collection start time of current dump. + */ +struct kbase_hwcnt_virtualizer_client { + struct list_head node; + struct kbase_hwcnt_virtualizer *hvirt; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool has_accum; + u64 ts_start_ns; +}; + +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return NULL; + + return hvirt->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); + +/** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. + * @hvcli: Pointer to virtualizer client. + * + * Will safely free a client in any partial state of construction. + */ +static void kbasep_hwcnt_virtualizer_client_free( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); + kbase_hwcnt_enable_map_free(&hvcli->enable_map); + kfree(hvcli); +} + +/** + * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer + * client. + * @metadata: Non-NULL pointer to counter metadata. + * @out_hvcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + + WARN_ON(!metadata); + WARN_ON(!out_hvcli); + + hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); + if (!hvcli) + return -ENOMEM; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); + if (errcode) + goto error; + + *out_hvcli = hvcli; + return 0; +error: + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a + * client's accumulation buffer. + * @hvcli: Non-NULL pointer to virtualizer client. + * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. + */ +static void kbasep_hwcnt_virtualizer_client_accumulate( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) +{ + WARN_ON(!hvcli); + WARN_ON(!dump_buf); + lockdep_assert_held(&hvcli->hvirt->lock); + + if (hvcli->has_accum) { + /* If already some accumulation, accumulate */ + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } else { + /* If no accumulation, copy */ + kbase_hwcnt_dump_buffer_copy( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } + hvcli->has_accum = true; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter + * accumulator after final client + * removal. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Will safely terminate the accumulator in any partial state of initialisation. + */ +static void kbasep_hwcnt_virtualizer_accumulator_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + + kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); + kbase_hwcnt_enable_map_free(&hvirt->scratch_map); + kbase_hwcnt_accumulator_release(hvirt->accum); + hvirt->accum = NULL; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter + * accumulator before first client + * addition. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_accumulator_init( + struct kbase_hwcnt_virtualizer *hvirt) +{ + int errcode; + + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + WARN_ON(hvirt->accum); + + errcode = kbase_hwcnt_accumulator_acquire( + hvirt->hctx, &hvirt->accum); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hvirt->metadata, &hvirt->scratch_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hvirt->metadata, &hvirt->scratch_buf); + if (errcode) + goto error; + + return 0; +error: + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to add. + * @enable_map: Non-NULL pointer to client's initial enable map. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_add( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->client_count == 0) + /* First client added, so initialise the accumulator */ + errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); + if (errcode) + return errcode; + + hvirt->client_count += 1; + + if (hvirt->client_count == 1) { + /* First client, so just pass the enable map onwards as is */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + enable_map, &ts_start_ns, &ts_end_ns, NULL); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy( + &hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into only existing clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + if (errcode) + goto error; + + list_add(&hvcli->node, &hvirt->clients); + hvcli->hvirt = hvirt; + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + hvcli->has_accum = false; + hvcli->ts_start_ns = ts_end_ns; + + return 0; +error: + hvirt->client_count -= 1; + if (hvirt->client_count == 0) + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ +static void kbasep_hwcnt_virtualizer_client_remove( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + lockdep_assert_held(&hvirt->lock); + + list_del(&hvcli->node); + hvirt->client_count -= 1; + + if (hvirt->client_count == 0) { + /* Last client removed, so terminate the accumulator */ + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into remaining clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + WARN_ON(errcode); +} + +/** + * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, + * and enable a new set of + * counters that will be used for + * subsequent dumps. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(enable_map->metadata != hvirt->metadata); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + /* Ignore the enable map of the selected client */ + if (pos != hvcli) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if ((enable_map->metadata != hvirt->metadata) || + (dump_buf && (dump_buf->metadata != hvirt->metadata))) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_set_counters( + hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy( + &hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_set_counters( + hvirt, hvcli, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); + +/** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Perform the dump */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if (dump_buf && (dump_buf->metadata != hvirt->metadata)) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_dump( + hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_dump( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); + +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli; + + if (!hvirt || !enable_map || !out_hvcli || + (enable_map->metadata != hvirt->metadata)) + return -EINVAL; + + errcode = kbasep_hwcnt_virtualizer_client_alloc( + hvirt->metadata, &hvcli); + if (errcode) + return errcode; + + mutex_lock(&hvirt->lock); + + errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); + + mutex_unlock(&hvirt->lock); + + if (errcode) { + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; + } + + *out_hvcli = hvcli; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); + +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + mutex_lock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); + + mutex_unlock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_free(hvcli); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); + +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_virtualizer **out_hvirt) +{ + struct kbase_hwcnt_virtualizer *virt; + const struct kbase_hwcnt_metadata *metadata; + + if (!hctx || !out_hvirt) + return -EINVAL; + + metadata = kbase_hwcnt_context_metadata(hctx); + if (!metadata) + return -EINVAL; + + virt = kzalloc(sizeof(*virt), GFP_KERNEL); + if (!virt) + return -ENOMEM; + + virt->hctx = hctx; + virt->metadata = metadata; + + mutex_init(&virt->lock); + INIT_LIST_HEAD(&virt->clients); + + *out_hvirt = virt; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); + +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return; + + /* Non-zero client count implies client leak */ + if (WARN_ON(hvirt->client_count != 0)) { + struct kbase_hwcnt_virtualizer_client *pos, *n; + + list_for_each_entry_safe(pos, n, &hvirt->clients, node) + kbase_hwcnt_virtualizer_client_destroy(pos); + } + + WARN_ON(hvirt->client_count != 0); + WARN_ON(hvirt->accum); + + kfree(hvirt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h new file mode 100644 index 000000000000..1efa81d0f64a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h @@ -0,0 +1,139 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter virtualizer API. + * + * Virtualizes a hardware counter context, so multiple clients can access + * a single hardware counter resource as though each was the exclusive user. + */ + +#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ +#define _KBASE_HWCNT_VIRTUALIZER_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_virtualizer; +struct kbase_hwcnt_virtualizer_client; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. + * @hctx: Non-NULL pointer to the hardware counter context to virtualize. + * @out_hvirt: Non-NULL pointer to where the pointer to the created virtualizer + * will be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_virtualizer **out_hvirt); + +/** + * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. + * @hvirt: Pointer to virtualizer to be terminated. + */ +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by + * the virtualizer, so related counter data + * structures can be created. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @enable_map: Non-NULL pointer to the enable map for the client. Must have the + * same metadata as the virtualizer. + * @out_hvcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); + +/** + * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. + * @hvcli: Pointer to the hardware counter client. + */ +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli); + +/** + * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, and + * enable a new set of counters + * that will be used for + * subsequent dumps. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h index fcb9ad31e937..ccf67df923a0 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h +++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h @@ -51,9 +51,24 @@ extern "C" { * specify pseudo chunked tiler alignment for JIT allocations. * 11.7: * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT */ #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 7 +#define BASE_UK_VERSION_MINOR 13 /** * struct kbase_ioctl_version_check - Check version compatibility with kernel @@ -531,21 +546,6 @@ struct kbase_ioctl_fence_validate { #define KBASE_IOCTL_FENCE_VALIDATE \ _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) -/** - * struct kbase_ioctl_get_profiling_controls - Get the profiling controls - * @count: The size of @buffer in u32 words - * @buffer: The buffer to receive the profiling controls - * @padding: Padding - */ -struct kbase_ioctl_get_profiling_controls { - __u64 buffer; - __u32 count; - __u32 padding; -}; - -#define KBASE_IOCTL_GET_PROFILING_CONTROLS \ - _IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls) - /** * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel * @buffer: Pointer to the information @@ -652,6 +652,7 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { * * @in: Input parameters * @out: Output parameters + * * This structure is used when performing a call to dump GPU write fault * addresses. */ @@ -673,9 +674,19 @@ union kbase_ioctl_cinstr_gwt_dump { #define KBASE_IOCTL_CINSTR_GWT_DUMP \ _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) -/* IOCTLs 36-41 are reserved */ -/* IOCTL 42 is free for use */ +/** + * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone + * + * @va_pages: Number of VA pages to reserve for EXEC_VA + */ +struct kbase_ioctl_mem_exec_init { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_EXEC_INIT \ + _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) + /*************** * test ioctls * @@ -718,8 +729,54 @@ struct kbase_ioctl_tlstream_stats { #define KBASE_IOCTL_TLSTREAM_STATS \ _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) +/** + * struct kbase_ioctl_cs_event_memory_write - Write an event memory address + * @cpu_addr: Memory address to write + * @value: Value to write + * @padding: Currently unused, must be zero + */ +struct kbase_ioctl_cs_event_memory_write { + __u64 cpu_addr; + __u8 value; + __u8 padding[7]; +}; + +/** + * union kbase_ioctl_cs_event_memory_read - Read an event memory address + * @cpu_addr: Memory address to read + * @value: Value read + * @padding: Currently unused, must be zero + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_cs_event_memory_read { + struct { + __u64 cpu_addr; + } in; + struct { + __u8 value; + __u8 padding[7]; + } out; +}; + #endif +/* Customer extension range */ +#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) + +/* If the integration needs extra ioctl add them there + * like this: + * + * struct my_ioctl_args { + * .... + * } + * + * #define KBASE_IOCTL_MY_IOCTL \ + * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) + */ + + /********************************** * Definitions for GPU properties * **********************************/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 71450d5ff998..97d7b43104ff 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -91,6 +91,7 @@ static int jd_run_atom(struct kbase_jd_atom *katom) } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { + kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; return 0; } @@ -200,7 +201,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st struct kbase_dma_fence_resv_info info = { .dma_fence_resv_count = 0, }; -#ifdef CONFIG_SYNC +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) /* * When both dma-buf fence and Android native sync is enabled, we * disable dma-buf fence for contexts that are using Android native @@ -208,9 +209,9 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st */ const bool implicit_sync = !kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC); -#else /* CONFIG_SYNC */ +#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/ const bool implicit_sync = true; -#endif /* CONFIG_SYNC */ +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ #endif /* CONFIG_MALI_DMA_FENCE */ struct base_external_resource *input_extres; @@ -685,13 +686,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, continue; } else if (node->core_req & BASE_JD_REQ_SOFT_JOB) { - /* If this is a fence wait soft job - * then remove it from the list of sync - * waiters. - */ - if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) - kbasep_remove_waiting_soft_job(node); - + WARN_ON(!list_empty(&node->queue)); kbase_finish_soft_job(node); } node->status = KBASE_JD_ATOM_STATE_COMPLETED; @@ -808,9 +803,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->nr_extres = user_atom->nr_extres; katom->extres = NULL; katom->device_nr = user_atom->device_nr; - katom->affinity = 0; katom->jc = user_atom->jc; - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; katom->core_req = user_atom->core_req; katom->atom_flags = 0; katom->retry_count = 0; @@ -827,6 +820,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->age = kctx->age_count++; + INIT_LIST_HEAD(&katom->queue); INIT_LIST_HEAD(&katom->jd_item); #ifdef CONFIG_MALI_DMA_FENCE kbase_fence_dep_count_set(katom, -1); @@ -923,10 +917,35 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us if (will_fail) { if (!queued) { + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + int err = kbase_prepare_soft_job(katom); + + if (err >= 0) + kbase_finish_soft_job(katom); + } + ret = jd_done_nolock(katom, NULL); goto out; } else { + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = + BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom, NULL); + goto out; + } + } + katom->will_fail_event_code = katom->event_code; ret = false; @@ -946,32 +965,13 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; katom->sched_priority = sched_prio; - /* Create a new atom recording all dependencies it was set up with. */ + /* Create a new atom. */ KBASE_TLSTREAM_TL_NEW_ATOM( katom, kbase_jd_atom_id(kctx, katom)); KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE); KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority); KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); - for (i = 0; i < 2; i++) - if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( - &katom->dep[i])) { - KBASE_TLSTREAM_TL_DEP_ATOM_ATOM( - (void *)kbase_jd_katom_dep_atom( - &katom->dep[i]), - (void *)katom); - } else if (BASE_JD_DEP_TYPE_INVALID != - user_atom->pre_dep[i].dependency_type) { - /* Resolved dependency. */ - int dep_atom_number = - user_atom->pre_dep[i].atom_id; - struct kbase_jd_atom *dep_atom = - &jctx->atoms[dep_atom_number]; - - KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM( - (void *)dep_atom, - (void *)katom); - } /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { @@ -1003,11 +1003,13 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us goto out; } - /* Reject fence wait soft-job atoms accessing external resources */ + /* Reject soft-job atom of certain types from accessing external resources */ if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT)) { + (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { dev_warn(kctx->kbdev->dev, - "Rejecting fence wait soft-job atom accessing external resources"); + "Rejecting soft-job atom accessing external resources"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; ret = jd_done_nolock(katom, NULL); goto out; @@ -1123,9 +1125,6 @@ int kbase_jd_submit(struct kbase_context *kctx, return -EINVAL; } - KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(nr_atoms, - &kctx->timeline.jd_atoms_in_flight)); - /* All atoms submitted in this call have the same flush ID */ latest_flush = kbase_backend_get_current_flush_id(kbdev); @@ -1136,9 +1135,6 @@ int kbase_jd_submit(struct kbase_context *kctx, if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) { err = -EINVAL; - KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, - atomic_sub_return(nr_atoms - i, - &kctx->timeline.jd_atoms_in_flight)); break; } @@ -1222,8 +1218,6 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbasep_js_atom_retained_state katom_retained_state; bool context_idle; base_jd_core_req core_req = katom->core_req; - u64 affinity = katom->affinity; - enum kbase_atom_coreref_state coreref_state = katom->coreref_state; /* Soft jobs should never reach this function */ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); @@ -1270,13 +1264,12 @@ void kbase_jd_done_worker(struct work_struct *data) return; } - if (katom->event_code != BASE_JD_EVENT_DONE) { - meson_gpu_fault ++; + if ((katom->event_code != BASE_JD_EVENT_DONE) && + (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n", - (unsigned long)katom->event_code, - katom->slot_nr); - } + (unsigned long)katom->event_code, + katom->slot_nr); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) kbase_as_poking_timer_release_atom(kbdev, kctx, katom); @@ -1370,8 +1363,7 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&jctx->lock); } - kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, - coreref_state); + kbase_backend_complete_wq_post_sched(kbdev, core_req); if (context_idle) kbase_pm_context_idle(kbdev); @@ -1559,6 +1551,10 @@ void kbase_jd_zap_context(struct kbase_context *kctx) flush_workqueue(kctx->dma_fence.wq); #endif +#ifdef CONFIG_DEBUG_FS + kbase_debug_job_fault_kctx_unblock(kctx); +#endif + kbase_jm_wait_for_zero_jobs(kctx); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c index 271daef14226..7b15d8a05bfd 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -190,9 +190,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) kbasep_jd_debugfs_atom_deps(deps, atom); seq_printf(sfile, - "%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ", + "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", i, atom->core_req, atom->status, - atom->coreref_state, deps[0].type, deps[0].id, deps[1].type, deps[1].id, start_timestamp); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h index ce0cb61f8c27..697bdef4d434 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,7 @@ #include -#define MALI_JD_DEBUGFS_VERSION 2 +#define MALI_JD_DEBUGFS_VERSION 3 /* Forward declarations */ struct kbase_context; diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index def56d2fcbe7..80b6d77e2fb0 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -386,6 +386,8 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) /* Add new node and rebalance tree. */ rb_link_node(&katom->runnable_tree_node, parent, new); rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_READY); } /** @@ -608,6 +610,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) struct kbasep_js_kctx_info *js_kctx_info; int js; bool update_ctx_count = false; + unsigned long flags; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -623,8 +626,10 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) mutex_lock(&kbdev->js_data.queue_mutex); mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); @@ -1180,9 +1185,6 @@ bool kbasep_js_add_job(struct kbase_context *kctx, goto out_unlock; } - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY); - KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); - enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, @@ -1935,7 +1937,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev) retained = retained << 1; - if (kctx) { + if (kctx && !(kbdev->as_free & (1u << i))) { kbase_ctx_sched_retain_ctx_refcount(kctx); retained |= 1u; /* We can only cope with up to 1 privileged context - @@ -1979,14 +1981,20 @@ void kbasep_js_resume(struct kbase_device *kbdev) for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_context *kctx, *n; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); list_for_each_entry_safe(kctx, n, &kbdev->js_data.ctx_list_unpullable[js][prio], jctx.sched_info.ctx.ctx_list_entry[js]) { struct kbasep_js_kctx_info *js_kctx_info; - unsigned long flags; bool timer_sync = false; + /* Drop lock so we can take kctx mutexes */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + js_kctx_info = &kctx->jctx.sched_info; mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -2003,7 +2011,11 @@ void kbasep_js_resume(struct kbase_device *kbdev) kbase_backend_ctx_count_changed(kbdev); mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Take lock before accessing list again */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } } mutex_unlock(&js_devdata->queue_mutex); @@ -2247,8 +2259,6 @@ static void js_return_worker(struct work_struct *data) bool context_idle = false; unsigned long flags; base_jd_core_req core_req = katom->core_req; - u64 affinity = katom->affinity; - enum kbase_atom_coreref_state coreref_state = katom->coreref_state; KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); @@ -2338,8 +2348,7 @@ static void js_return_worker(struct work_struct *data) kbase_js_sched_all(kbdev); - kbase_backend_complete_wq_post_sched(kbdev, core_req, affinity, - coreref_state); + kbase_backend_complete_wq_post_sched(kbdev, core_req); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) @@ -2747,12 +2756,16 @@ void kbase_js_zap_context(struct kbase_context *kctx) * handled when it leaves the runpool. */ if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (!list_empty( &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init( &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* The following events require us to kill off remaining jobs * and update PM book-keeping: diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 963cef903209..355da27edc1b 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -662,7 +662,8 @@ static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js set_bit = (u16) (1u << kctx->as_nr); - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed |= set_bit; } @@ -687,7 +688,8 @@ static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data * clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed &= clear_mask; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 6fd908aceb66..1ff230cc222d 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -198,29 +198,6 @@ static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, stru * More commonly used public functions */ -void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx) -{ - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* This context never submits, so don't track any scheduling attributes */ - return; - } - - /* Transfer attributes held in the context flags for contexts that have submit enabled */ - - /* ... More attributes can be added here ... */ - - /* The context should not have been scheduled yet, so ASSERT if this caused - * runpool state changes (note that other threads *can't* affect the value - * of runpool_state_changed, due to how it's calculated) */ - KBASE_DEBUG_ASSERT(runpool_state_changed == false); - CSTD_UNUSED(runpool_state_changed); -} - void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { bool runpool_state_changed; diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h index be781e60c822..25fd39787c71 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,14 +45,6 @@ * @{ */ -/** - * Set the initial attributes of a context (when context create flags are set) - * - * Requires: - * - Hold the jsctx_mutex - */ -void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx); - /** * Retain all attributes of a context * diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index 7385daa42e94..052a0b368315 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -151,18 +151,19 @@ typedef u32 kbasep_js_atom_done_code; */ enum { /* - * In this mode, the context containing higher priority atoms will be - * scheduled first and also the new runnable higher priority atoms can - * preempt lower priority atoms currently running on the GPU, even if - * they belong to a different context. + * In this mode, higher priority atoms will be scheduled first, + * regardless of the context they belong to. Newly-runnable higher + * priority atoms can preempt lower priority atoms currently running on + * the GPU, even if they belong to a different context. */ KBASE_JS_SYSTEM_PRIORITY_MODE = 0, /* - * In this mode, the contexts are scheduled in round-robin fashion and - * the new runnable higher priority atoms can preempt the lower priority - * atoms currently running on the GPU, only if they belong to the same - * context. + * In this mode, the highest-priority atom will be chosen from each + * context in turn using a round-robin algorithm, so priority only has + * an effect within the context an atom belongs to. Newly-runnable + * higher priority atoms can preempt the lower priority atoms currently + * running on the GPU, but only if they belong to the same context. */ KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index 1dd161b3c20f..3d0de90346de 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -42,29 +42,34 @@ #include #include -/* This function finds out which RB tree the given GPU VA region belongs to - * based on the region zone */ -static struct rb_root *kbase_reg_flags_to_rbtree(struct kbase_context *kctx, - struct kbase_va_region *reg) +/* Forward declarations */ +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp); + +static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) { - struct rb_root *rbtree = NULL; +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - rbtree = &kctx->reg_rbtree_custom; - break; - case KBASE_REG_ZONE_EXEC: - rbtree = &kctx->reg_rbtree_exec; - break; - case KBASE_REG_ZONE_SAME_VA: - rbtree = &kctx->reg_rbtree_same; - /* fall through */ - default: - rbtree = &kctx->reg_rbtree_same; - break; - } +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif - return rbtree; + return cpu_va_bits; } /* This function finds out which RB tree the given pfn from the GPU VA belongs @@ -74,37 +79,41 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, { struct rb_root *rbtree = NULL; + /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA + * zone if this has been initialized. + */ + if (gpu_pfn >= kctx->exec_va_start) + rbtree = &kctx->reg_rbtree_exec; + else { + u64 same_va_end; + #ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) #endif /* CONFIG_64BIT */ - if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE) - rbtree = &kctx->reg_rbtree_custom; - else if (gpu_pfn >= KBASE_REG_ZONE_EXEC_BASE) - rbtree = &kctx->reg_rbtree_exec; - else - rbtree = &kctx->reg_rbtree_same; + same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; #ifdef CONFIG_64BIT - } else { - if (gpu_pfn >= kctx->same_va_end) + else + same_va_end = kctx->same_va_end; +#endif /* CONFIG_64BIT */ + + if (gpu_pfn >= same_va_end) rbtree = &kctx->reg_rbtree_custom; else rbtree = &kctx->reg_rbtree_same; } -#endif /* CONFIG_64BIT */ return rbtree; } /* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_context *kctx, - struct kbase_va_region *new_reg) +static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) { u64 start_pfn = new_reg->start_pfn; struct rb_node **link = NULL; struct rb_node *parent = NULL; struct rb_root *rbtree = NULL; - rbtree = kbase_reg_flags_to_rbtree(kctx, new_reg); + rbtree = new_reg->rbtree; link = &(rbtree->rb_node); /* Find the right place in the tree using tree search */ @@ -129,18 +138,13 @@ static void kbase_region_tracker_insert(struct kbase_context *kctx, rb_insert_color(&(new_reg->rblink), rbtree); } -/* Find allocated region enclosing free range. */ -static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free( - struct kbase_context *kctx, u64 start_pfn, size_t nr_pages) +static struct kbase_va_region *find_region_enclosing_range_rbtree( + struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) { - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - struct rb_root *rbtree = NULL; - + struct rb_node *rbnode; + struct kbase_va_region *reg; u64 end_pfn = start_pfn + nr_pages; - rbtree = kbase_gpu_va_to_rbtree(kctx, start_pfn); - rbnode = rbtree->rb_node; while (rbnode) { @@ -163,19 +167,12 @@ static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_ return NULL; } -/* Find region enclosing given address. */ -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr) { + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; struct rb_node *rbnode; struct kbase_va_region *reg; - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - KBASE_DEBUG_ASSERT(NULL != kctx); - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); rbnode = rbtree->rb_node; @@ -199,14 +196,11 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struc return NULL; } -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); - -/* Find region with given base address */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr) +/* Find region enclosing given address. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr) { u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; struct rb_root *rbtree = NULL; KBASE_DEBUG_ASSERT(NULL != kctx); @@ -215,6 +209,18 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +struct kbase_va_region *kbase_find_region_base_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + rbnode = rbtree->rb_node; while (rbnode) { @@ -225,17 +231,30 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kba rbnode = rbnode->rb_right; else return reg; - } return NULL; } +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_base_address(rbtree, gpu_addr); +} + KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); /* Find region meeting given requirements */ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( - struct kbase_context *kctx, struct kbase_va_region *reg_reqs, + struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align_offset, size_t align_mask, u64 *out_start_pfn) { @@ -245,11 +264,9 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( /* Note that this search is a linear search, as we do not have a target address in mind, so does not benefit from the rbtree search */ - rbtree = kbase_reg_flags_to_rbtree(kctx, reg_reqs); - - rbnode = rb_first(rbtree); + rbtree = reg_reqs->rbtree; - while (rbnode) { + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { reg = rb_entry(rbnode, struct kbase_va_region, rblink); if ((reg->nr_pages >= nr_pages) && (reg->flags & KBASE_REG_FREE)) { @@ -265,6 +282,27 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( start_pfn += align_mask; start_pfn -= (start_pfn - align_offset) & (align_mask); + if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { + /* Can't end at 4GB boundary */ + if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + /* Can't start at 4GB boundary */ + if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || + !(start_pfn & BASE_MEM_PFN_MASK_4GB)) + continue; + } else if (reg_reqs->flags & + KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + u64 end_pfn = start_pfn + nr_pages - 1; + + if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != + (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) + start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; + } + if ((start_pfn >= reg->start_pfn) && (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { @@ -272,7 +310,6 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( return reg; } } - rbnode = rb_next(rbnode); } return NULL; @@ -286,7 +323,7 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( * region lock held. The associated memory is not released (see * kbase_free_alloced_region). Internal use only. */ -static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg) +int kbase_remove_va_region(struct kbase_va_region *reg) { struct rb_node *rbprev; struct kbase_va_region *prev = NULL; @@ -298,7 +335,7 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re int merged_back = 0; int err = 0; - reg_rbtree = kbase_reg_flags_to_rbtree(kctx, reg); + reg_rbtree = reg->rbtree; /* Try to merge with the previous block first */ rbprev = rb_prev(&(reg->rblink)); @@ -344,7 +381,9 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re */ struct kbase_va_region *free_reg; - free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); + free_reg = kbase_alloc_free_region(reg_rbtree, + reg->start_pfn, reg->nr_pages, + reg->flags & KBASE_REG_ZONE_MASK); if (!free_reg) { err = -ENOMEM; goto out; @@ -359,14 +398,21 @@ static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_re KBASE_EXPORT_TEST_API(kbase_remove_va_region); /** - * @brief Insert a VA region to the list, replacing the current at_reg. + * kbase_insert_va_region_nolock - Insert a VA region to the list, + * replacing the existing one. + * + * @new_reg: The new region to insert + * @at_reg: The region to replace + * @start_pfn: The Page Frame Number to insert at + * @nr_pages: The number of pages of the region */ -static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) { struct rb_root *reg_rbtree = NULL; int err = 0; - reg_rbtree = kbase_reg_flags_to_rbtree(kctx, at_reg); + reg_rbtree = at_reg->rbtree; /* Must be a free region */ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); @@ -390,19 +436,19 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); at_reg->nr_pages -= nr_pages; - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_reg); } /* New region replaces the end of the old one, so insert after. */ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { at_reg->nr_pages -= nr_pages; - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_reg); } /* New region splits the old one, so insert and create new */ else { struct kbase_va_region *new_front_reg; - new_front_reg = kbase_alloc_free_region(kctx, + new_front_reg = kbase_alloc_free_region(reg_rbtree, at_reg->start_pfn, start_pfn - at_reg->start_pfn, at_reg->flags & KBASE_REG_ZONE_MASK); @@ -411,8 +457,8 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; at_reg->start_pfn = start_pfn + nr_pages; - kbase_region_tracker_insert(kctx, new_front_reg); - kbase_region_tracker_insert(kctx, new_reg); + kbase_region_tracker_insert(new_front_reg); + kbase_region_tracker_insert(new_reg); } else { err = -ENOMEM; } @@ -422,21 +468,84 @@ static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbas } /** - * @brief Add a VA region to the list. + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages */ int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { - struct kbase_va_region *tmp; - u64 gpu_pfn = addr >> PAGE_SHIFT; int err = 0; + struct kbase_device *kbdev = kctx->kbdev; + int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); + int gpu_pc_bits = + kbdev->gpu_props.props.core_props.log2_program_counter_size; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + /* The executable allocation from the SAME_VA zone would already have an + * appropriately aligned GPU VA chosen for it. + */ + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) { + if (cpu_va_bits > gpu_pc_bits) { + align = max(align, (size_t)((1ULL << gpu_pc_bits) + >> PAGE_SHIFT)); + } + } + + do { + err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, + align); + if (err != -ENOMEM) + break; + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align) +{ + struct rb_root *rbtree = NULL; + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + rbtree = reg->rbtree; + if (!align) align = 1; @@ -444,103 +553,86 @@ int kbase_add_va_region(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(is_power_of_2(align)); KBASE_DEBUG_ASSERT(nr_pages > 0); - /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ + /* Path 1: Map a specific address. Find the enclosing region, + * which *must* be free. + */ if (gpu_pfn) { - struct device *dev = kctx->kbdev->dev; + struct device *dev = kbdev->dev; KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); - tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); + tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, + nr_pages); if (!tmp) { dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); err = -ENOMEM; goto exit; } if (!(tmp->flags & KBASE_REG_FREE)) { - dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); - dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); - dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + tmp->start_pfn, tmp->flags, + tmp->nr_pages, gpu_pfn, nr_pages); err = -ENOMEM; goto exit; } - err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); + err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, + nr_pages); if (err) { dev_warn(dev, "Failed to insert va region"); err = -ENOMEM; - goto exit; } - - goto exit; - } - - /* Path 2: Map any free address which meets the requirements. - * - * Depending on the zone the allocation request is for - * we might need to retry it. */ - do { + } else { + /* Path 2: Map any free address which meets the requirements. */ u64 start_pfn; size_t align_offset = align; size_t align_mask = align - 1; if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { - WARN(align > 1, - "kbase_add_va_region with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, (unsigned long)align); align_mask = reg->extent - 1; align_offset = reg->extent - reg->initial_commit; } - tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, nr_pages, align_offset, align_mask, &start_pfn); if (tmp) { - err = kbase_insert_va_region_nolock(kctx, reg, tmp, - start_pfn, nr_pages); - break; + err = kbase_insert_va_region_nolock(reg, tmp, + start_pfn, nr_pages); + } else { + err = -ENOMEM; } + } - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); - - if (!tmp) - err = -ENOMEM; - - exit: +exit: return err; } -KBASE_EXPORT_TEST_API(kbase_add_va_region); - /** * @brief Initialize the internal region tracker data structure. */ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, - struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(kctx, same_va_reg); + kbase_region_tracker_insert(same_va_reg); - /* Although exec and custom_va_reg don't always exist, + /* Although custom_va_reg and exec_va_reg don't always exist, * initialize unconditionally because of the mem_view debugfs - * implementation which relies on these being empty + * implementation which relies on them being empty. + * + * The difference between the two is that the EXEC_VA region + * is never initialized at this stage. */ - kctx->reg_rbtree_exec = RB_ROOT; kctx->reg_rbtree_custom = RB_ROOT; + kctx->reg_rbtree_exec = RB_ROOT; - if (exec_reg) - kbase_region_tracker_insert(kctx, exec_reg); if (custom_va_reg) - kbase_region_tracker_insert(kctx, custom_va_reg); + kbase_region_tracker_insert(custom_va_reg); } static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) @@ -561,43 +653,24 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) void kbase_region_tracker_term(struct kbase_context *kctx) { kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); } -static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) { -#if defined(CONFIG_ARM64) - /* VA_BITS can be as high as 48 bits, but all bits are available for - * both user and kernel. - */ - size_t cpu_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - /* x86_64 can access 48 bits of VA, but the 48th is used to denote - * kernel (1) vs userspace (0), so the max here is 47. - */ - size_t cpu_va_bits = 47; -#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) - size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; -#else -#error "Unknown CPU VA width for this architecture" -#endif - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - cpu_va_bits = 32; -#endif + kbase_region_tracker_erase_rbtree(rbtree); +} - return min(cpu_va_bits, (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ + return min(kbase_get_num_cpu_va_bits(kctx), + (size_t) kctx->kbdev->gpu_props.mmu.va_bits); } -/** - * Initialize the region tracker data structure. - */ int kbase_region_tracker_init(struct kbase_context *kctx) { struct kbase_va_region *same_va_reg; - struct kbase_va_region *exec_reg = NULL; struct kbase_va_region *custom_va_reg = NULL; size_t same_va_bits = kbase_get_same_va_bits(kctx); u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; @@ -610,7 +683,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(kctx, 1, + same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, same_va_pages, KBASE_REG_ZONE_SAME_VA); @@ -620,7 +693,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) } #ifdef CONFIG_64BIT - /* 32-bit clients have exec and custom VA zones */ + /* 32-bit clients have custom VA zones */ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { @@ -634,38 +707,32 @@ int kbase_region_tracker_init(struct kbase_context *kctx) if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - exec_reg = kbase_alloc_free_region(kctx, - KBASE_REG_ZONE_EXEC_BASE, - KBASE_REG_ZONE_EXEC_SIZE, - KBASE_REG_ZONE_EXEC); - - if (!exec_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } - - custom_va_reg = kbase_alloc_free_region(kctx, + custom_va_reg = kbase_alloc_free_region( + &kctx->reg_rbtree_custom, KBASE_REG_ZONE_CUSTOM_VA_BASE, custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { err = -ENOMEM; - goto fail_free_exec; + goto fail_free_same_va; } #ifdef CONFIG_64BIT + } else { + custom_va_size = 0; } #endif - kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, - custom_va_reg); + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); kctx->same_va_end = same_va_pages + 1; + kctx->gpu_va_end = kctx->same_va_end + custom_va_size; + kctx->exec_va_start = U64_MAX; + kctx->jit_va = false; + kbase_gpu_vm_unlock(kctx); return 0; -fail_free_exec: - kbase_free_alloced_region(exec_reg); fail_free_same_va: kbase_free_alloced_region(same_va_reg); fail_unlock: @@ -681,11 +748,12 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, struct kbase_va_region *custom_va_reg; u64 same_va_bits = kbase_get_same_va_bits(kctx); u64 total_va_size; - int err; total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; - kbase_gpu_vm_lock(kctx); + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) + return -EINVAL; /* * Modify the same VA free region after creation. Be careful to ensure @@ -694,23 +762,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, */ same_va = kbase_region_tracker_find_region_base_address(kctx, PAGE_SIZE); - if (!same_va) { - err = -ENOMEM; - goto fail_unlock; - } - - /* The region flag or region size has changed since creation so bail. */ - if ((!(same_va->flags & KBASE_REG_FREE)) || - (same_va->nr_pages != total_va_size)) { - err = -ENOMEM; - goto fail_unlock; - } + if (!same_va) + return -ENOMEM; - if (same_va->nr_pages < jit_va_pages || - kctx->same_va_end < jit_va_pages) { - err = -ENOMEM; - goto fail_unlock; - } + if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + return -ENOMEM; /* It's safe to adjust the same VA zone now */ same_va->nr_pages -= jit_va_pages; @@ -720,51 +776,129 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, * Create a custom VA zone at the end of the VA for allocations which * JIT can use so it doesn't have to allocate VA from the kernel. */ - custom_va_reg = kbase_alloc_free_region(kctx, + custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, kctx->same_va_end, jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); - if (!custom_va_reg) { - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - err = -ENOMEM; - goto fail_unlock; - } - - kbase_region_tracker_insert(kctx, custom_va_reg); + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + if (!custom_va_reg) + return -ENOMEM; - kbase_gpu_vm_unlock(kctx); + kbase_region_tracker_insert(custom_va_reg); return 0; - -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; } #endif int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, u8 max_allocations, u8 trim_level) { + int err = 0; + if (trim_level > 100) return -EINVAL; - kctx->jit_max_allocations = max_allocations; - kctx->trim_level = trim_level; + kbase_gpu_vm_lock(kctx); #ifdef CONFIG_64BIT if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - return kbase_region_tracker_init_jit_64(kctx, jit_va_pages); + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); #endif /* * Nothing to do for 32-bit clients, JIT uses the existing * custom VA zone. */ - return 0; + + if (!err) { + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + kctx->jit_va = true; + } + + kbase_gpu_vm_unlock(kctx); + + return err; } +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ + struct kbase_va_region *shrinking_va_reg; + struct kbase_va_region *exec_va_reg; + u64 exec_va_start, exec_va_base_addr; + int err; + + /* The EXEC_VA zone shall be created by making space at the end of the + * address space. Firstly, verify that the number of EXEC_VA pages + * requested by the client is reasonable and then make sure that it is + * not greater than the address space itself before calculating the base + * address of the new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } + + if (exec_va_pages > kctx->gpu_va_end) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_start = kctx->gpu_va_end - exec_va_pages; + exec_va_base_addr = exec_va_start << PAGE_SHIFT; + + shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, + exec_va_base_addr); + if (!shrinking_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Make sure that the EXEC_VA region is still uninitialized */ + if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_EXEC_VA) { + err = -EPERM; + goto exit_unlock; + } + + if (shrinking_va_reg->nr_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, + exec_va_start, + exec_va_pages, + KBASE_REG_ZONE_EXEC_VA); + if (!exec_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + shrinking_va_reg->nr_pages -= exec_va_pages; +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + kctx->same_va_end -= exec_va_pages; +#endif + kctx->exec_va_start = exec_va_start; + + kbase_region_tracker_insert(exec_va_reg); + err = 0; + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + + int kbase_mem_init(struct kbase_device *kbdev) { struct kbasep_mem_device *memdev; @@ -830,15 +964,15 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, - * or KBASE_REG_ZONE_EXEC + * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. * */ -struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone) { struct kbase_va_region *new_reg; - KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(rbtree != NULL); /* zone argument should only contain zone related region flags */ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); @@ -853,7 +987,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->kctx = kctx; + new_reg->rbtree = rbtree; new_reg->flags = zone | KBASE_REG_FREE; new_reg->flags |= KBASE_REG_GROWABLE; @@ -868,6 +1002,33 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 KBASE_EXPORT_TEST_API(kbase_alloc_free_region); +static struct kbase_context *kbase_reg_flags_to_kctx( + struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_exec); + break; + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + /** * @brief Free a region object. * @@ -881,7 +1042,13 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); void kbase_free_alloced_region(struct kbase_va_region *reg) { if (!(reg->flags & KBASE_REG_FREE)) { - mutex_lock(®->kctx->jit_evict_lock); + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + + mutex_lock(&kctx->jit_evict_lock); /* * The physical allocation should have been removed from the @@ -891,7 +1058,7 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) * on the list at termination time of the region tracker. */ if (!list_empty(®->gpu_alloc->evict_node)) { - mutex_unlock(®->kctx->jit_evict_lock); + mutex_unlock(&kctx->jit_evict_lock); /* * Unlink the physical allocation before unmaking it @@ -916,14 +1083,14 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) kbase_mem_evictable_unmake(reg->gpu_alloc); } } else { - mutex_unlock(®->kctx->jit_evict_lock); + mutex_unlock(&kctx->jit_evict_lock); } /* * Remove the region from the sticky resource metadata * list should it be there. */ - kbase_sticky_resource_release(reg->kctx, NULL, + kbase_sticky_resource_release(kctx, NULL, reg->start_pfn << PAGE_SHIFT); kbase_mem_phy_alloc_put(reg->cpu_alloc); @@ -944,7 +1111,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 unsigned long mask = ~KBASE_REG_MEMATTR_MASK; unsigned long gwt_mask = ~0; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) gwt_mask = ~KBASE_REG_GPU_WR; #endif @@ -971,11 +1138,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); for (i = 0; i < alloc->imported.alias.nents; i++) { if (alloc->imported.alias.aliased[i].alloc) { - err = kbase_mmu_insert_pages(kctx, + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, reg->start_pfn + (i * stride), alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask); + reg->flags & gwt_mask, + kctx->as_nr); if (err) goto bad_insert; @@ -992,10 +1161,13 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 } } } else { - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask); + reg->flags & gwt_mask, + kctx->as_nr); if (err) goto bad_insert; kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); @@ -1011,12 +1183,16 @@ bad_insert: KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); while (i--) if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { - kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length); + kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn + (i * stride), + reg->gpu_alloc->imported.alias.aliased[i].length, + kctx->as_nr); kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); } } - kbase_remove_va_region(kctx, reg); + kbase_remove_va_region(reg); return err; } @@ -1036,13 +1212,16 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { size_t i; - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, kctx->as_nr); KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) if (reg->gpu_alloc->imported.alias.aliased[i].alloc) kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); } else { - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, kbase_reg_current_backed_size(reg), + kctx->as_nr); kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); } @@ -1063,7 +1242,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (err) return err; - err = kbase_remove_va_region(kctx, reg); + err = kbase_remove_va_region(reg); return err; } @@ -1345,7 +1524,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re lockdep_assert_held(&kctx->reg_lock); if (reg->flags & KBASE_REG_JIT) { - dev_warn(reg->kctx->kbdev->dev, "Attempt to free JIT memory!\n"); + dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n"); return -EINVAL; } @@ -1372,7 +1551,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re err = kbase_gpu_munmap(kctx, reg); if (err) { - dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); goto out; } @@ -1482,7 +1661,8 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_GPU_NX; if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { - if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) + if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && + !(flags & BASE_MEM_UNCACHED_GPU)) return -EINVAL; } else if (flags & (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { @@ -1497,8 +1677,20 @@ int kbase_update_region_flags(struct kbase_context *kctx, if (flags & BASE_MEM_TILER_ALIGN_TOP) reg->flags |= KBASE_REG_TILER_ALIGN_TOP; + /* Set up default MEMATTR usage */ - if (kctx->kbdev->system_coherency == COHERENCY_ACE && + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + if (kctx->kbdev->mmu_mode->flags & + KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* Override shareability, and MEMATTR for uncached */ + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + } else { + dev_warn(kctx->kbdev->dev, + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + return -EINVAL; + } + } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && (reg->flags & KBASE_REG_SHARE_BOTH)) { reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); @@ -1507,6 +1699,12 @@ int kbase_update_region_flags(struct kbase_context *kctx, KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); } + if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING) + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + + if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) + reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; + return 0; } @@ -1520,14 +1718,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, struct tagged_addr *tp; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); if (alloc->reg) { if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) goto invalid_request; } - kctx = alloc->imported.kctx; + kctx = alloc->imported.native.kctx; if (nr_pages_requested == 0) goto done; /*nothing to do*/ @@ -1563,7 +1761,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, if (nr_left) { struct kbase_sub_alloc *sa, *temp_sa; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); list_for_each_entry_safe(sa, temp_sa, &kctx->mem_partials, link) { @@ -1586,7 +1784,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, } } } - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } /* only if we actually have a chunk left <512. If more it indicates @@ -1633,9 +1831,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, nr_left = 0; /* expose for later use */ - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); list_add(&sa->link, &kctx->mem_partials); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } } } @@ -1696,7 +1894,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( struct tagged_addr *new_pages = NULL; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); lockdep_assert_held(&pool->pool_lock); @@ -1709,7 +1907,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( goto invalid_request; } - kctx = alloc->imported.kctx; + kctx = alloc->imported.native.kctx; lockdep_assert_held(&kctx->mem_partials_lock); @@ -1837,14 +2035,36 @@ alloc_failed: if (nr_left != nr_pages_requested) { size_t nr_pages_to_free = nr_pages_requested - nr_left; - alloc->nents += nr_pages_to_free; + struct tagged_addr *start_free = alloc->pages + alloc->nents; - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + kbase_mem_pool_free_pages_locked( + pool, 512, + start_free, + false, /* not dirty */ + true); /* return to pool */ + nr_pages_to_free -= 512; + start_free += 512; + } else if (is_partial(*start_free)) { + free_partial_locked(kctx, pool, + *start_free); + nr_pages_to_free--; + start_free++; + } + } + } else { +#endif + kbase_mem_pool_free_pages_locked(pool, + nr_pages_to_free, + start_free, + false, /* not dirty */ + true); /* return to pool */ +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif } kbase_process_page_usage_dec(kctx, nr_pages_requested); @@ -1861,10 +2081,10 @@ static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) struct page *p, *head_page; struct kbase_sub_alloc *sa; - p = phys_to_page(as_phys_addr_t(tp)); + p = as_page(tp); head_page = (struct page *)p->lru.prev; sa = (struct kbase_sub_alloc *)head_page->lru.next; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); clear_bit(p - head_page, sa->sub_pages); if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { list_del(&sa->link); @@ -1875,14 +2095,14 @@ static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) /* expose the partial again */ list_add(&sa->link, &kctx->mem_partials); } - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); } int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; @@ -1890,7 +2110,7 @@ int kbase_free_phy_pages_helper( size_t freed = 0; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); /* early out if nothing to do */ @@ -1976,13 +2196,13 @@ static void free_partial_locked(struct kbase_context *kctx, lockdep_assert_held(&pool->pool_lock); lockdep_assert_held(&kctx->mem_partials_lock); - p = phys_to_page(as_phys_addr_t(tp)); + p = as_page(tp); head_page = (struct page *)p->lru.prev; sa = (struct kbase_sub_alloc *)head_page->lru.next; clear_bit(p - head_page, sa->sub_pages); if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { list_del(&sa->link); - kbase_mem_pool_free(pool, head_page, true); + kbase_mem_pool_free_locked(pool, head_page, true); kfree(sa); } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == SZ_2M / SZ_4K - 1) { @@ -1995,14 +2215,14 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, struct tagged_addr *pages, size_t nr_pages_to_free) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; bool syncback; bool reclaimed = (alloc->evicted != 0); struct tagged_addr *start_free; size_t freed = 0; KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); lockdep_assert_held(&pool->pool_lock); @@ -2093,14 +2313,26 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - if (!WARN_ON(!alloc->imported.kctx)) { + + if (!WARN_ON(!alloc->imported.native.kctx)) { + if (alloc->permanent_map) + kbase_phy_alloc_mapping_term( + alloc->imported.native.kctx, + alloc); + /* * The physical allocation must have been removed from * the eviction list before trying to free it. */ - mutex_lock(&alloc->imported.kctx->jit_evict_lock); + mutex_lock( + &alloc->imported.native.kctx->jit_evict_lock); WARN_ON(!list_empty(&alloc->evict_node)); - mutex_unlock(&alloc->imported.kctx->jit_evict_lock); + mutex_unlock( + &alloc->imported.native.kctx->jit_evict_lock); + + kbase_process_page_usage_dec( + alloc->imported.native.kctx, + alloc->imported.native.nr_struct_pages); } kbase_free_phy_pages_helper(alloc, alloc->nents); break; @@ -2132,16 +2364,11 @@ void kbase_mem_kref_free(struct kref *kref) case KBASE_MEM_TYPE_IMPORTED_USER_BUF: if (alloc->imported.user_buf.mm) mmdrop(alloc->imported.user_buf.mm); - kfree(alloc->imported.user_buf.pages); - break; - case KBASE_MEM_TYPE_TB:{ - void *tb; - - tb = alloc->imported.kctx->jctx.tb; - kbase_device_trace_buffer_uninstall(alloc->imported.kctx); - vfree(tb); + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc->imported.user_buf.pages); + else + kfree(alloc->imported.user_buf.pages); break; - } default: WARN(1, "Unexecpted free of type %d\n", alloc->type); break; @@ -2220,6 +2447,14 @@ bool kbase_check_alloc_flags(unsigned long flags) BASE_MEM_TILER_ALIGN_TOP))) return false; + /* To have an allocation lie within a 4GB chunk is required only for + * TLS memory, which will never be used to contain executable code + * and also used for Tiler heap. + */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + return false; + /* GPU should have at least read or write access otherwise there is no reason for allocating. */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) @@ -2290,9 +2525,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, return -EINVAL; } - if (va_pages > (U64_MAX / PAGE_SIZE)) { - /* 64-bit address range is the max */ - dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than 64-bit address range!", + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", (unsigned long long)va_pages); return -ENOMEM; } @@ -2351,6 +2585,13 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, #undef KBASE_MSG_PRE_FLAG } + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && + (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", + (unsigned long long)va_pages); + return -EINVAL; + } + return 0; #undef KBASE_MSG_PRE } @@ -2687,7 +2928,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, if (reg->cpu_alloc != reg->gpu_alloc) pages_required *= 2; - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); /* As we can not allocate memory from the kernel with the vm_lock held, @@ -2699,14 +2940,14 @@ static int kbase_jit_grow(struct kbase_context *kctx, int pool_delta = pages_required - kbase_mem_pool_size(pool); kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); kbase_gpu_vm_unlock(kctx); if (kbase_mem_pool_grow(pool, pool_delta)) goto update_failed_unlocked; kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); } @@ -2714,7 +2955,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, delta, &prealloc_sas[0]); if (!gpu_pages) { kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); goto update_failed; } @@ -2727,12 +2968,12 @@ static int kbase_jit_grow(struct kbase_context *kctx, kbase_free_phy_pages_helper_locked(reg->gpu_alloc, pool, gpu_pages, delta); kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); goto update_failed; } } kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_unlock(&kctx->mem_partials_lock); ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, old_size); @@ -2759,6 +3000,30 @@ update_failed_unlocked: return ret; } +static void trace_jit_stats(struct kbase_context *kctx, + u32 bin_id, u32 max_allocations) +{ + const u32 alloc_count = + kctx->jit_current_allocations_per_bin[bin_id]; + + struct kbase_va_region *walker; + u32 va_pages = 0; + u32 ph_pages = 0; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + if (walker->jit_bin_id != bin_id) + continue; + + va_pages += walker->nr_pages; + ph_pages += walker->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + KBASE_TLSTREAM_AUX_JIT_STATS(kctx->id, bin_id, max_allocations, + alloc_count, va_pages, ph_pages); +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, struct base_jit_alloc_info *info) { @@ -2784,11 +3049,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, if (info->usage_id != 0) { /* First scan for an allocation with the same usage ID */ struct kbase_va_region *walker; - struct kbase_va_region *temp; size_t current_diff = SIZE_MAX; - list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, - jit_node) { + list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { if (walker->jit_usage_id == info->usage_id && walker->jit_bin_id == info->bin_id && @@ -2826,11 +3089,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, * use. Search for an allocation we can reuse. */ struct kbase_va_region *walker; - struct kbase_va_region *temp; size_t current_diff = SIZE_MAX; - list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, - jit_node) { + list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { if (walker->jit_bin_id == info->bin_id && meet_size_and_tiler_align_top_requirements( @@ -2915,6 +3176,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, kctx->jit_current_allocations++; kctx->jit_current_allocations_per_bin[info->bin_id]++; + trace_jit_stats(kctx, info->bin_id, info->max_allocations); + reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; @@ -2958,6 +3221,8 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); kbase_gpu_vm_lock(kctx); @@ -2982,7 +3247,10 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) void kbase_jit_backing_lost(struct kbase_va_region *reg) { - struct kbase_context *kctx = reg->kctx; + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; lockdep_assert_held(&kctx->jit_evict_lock); @@ -3068,6 +3336,17 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = (kctx->exec_va_start != U64_MAX); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + static int kbase_jd_user_buf_map(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -3151,14 +3430,14 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, } alloc->nents = pinned_pages; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask); + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + pa, kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr); if (err == 0) return 0; @@ -3265,16 +3544,17 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, /* Update nents as we now have pages to map */ alloc->nents = reg->nr_pages; -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), count, (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) & - gwt_mask); + gwt_mask, + kctx->as_nr); if (err) goto err_unmap_attachment; @@ -3292,7 +3572,8 @@ static int kbase_jd_umm_map(struct kbase_context *kctx, return 0; err_teardown_orig_pages: - kbase_mmu_teardown_pages(kctx, reg->start_pfn, count); + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + count, kctx->as_nr); err_unmap_attachment: dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); @@ -3372,9 +3653,11 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, int err; err = kbase_mmu_teardown_pages( - kctx, + kctx->kbdev, + &kctx->mmu, reg->start_pfn, - alloc->nents); + alloc->nents, + kctx->as_nr); WARN_ON(err); } @@ -3391,9 +3674,11 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, if (reg && reg->gpu_alloc == alloc) kbase_mmu_teardown_pages( - kctx, + kctx->kbdev, + &kctx->mmu, reg->start_pfn, - kbase_reg_current_backed_size(reg)); + kbase_reg_current_backed_size(reg), + kctx->as_nr); if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) writeable = false; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index e55a8fba8e8b..a873bb1d08f5 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -45,6 +45,9 @@ /* Required for kbase_mem_evictable_unmake */ #include "mali_kbase_mem_linux.h" +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, + int pages); + /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ @@ -77,7 +80,6 @@ enum kbase_memory_type { KBASE_MEM_TYPE_IMPORTED_UMM, KBASE_MEM_TYPE_IMPORTED_USER_BUF, KBASE_MEM_TYPE_ALIAS, - KBASE_MEM_TYPE_TB, KBASE_MEM_TYPE_RAW }; @@ -125,6 +127,13 @@ struct kbase_mem_phy_alloc { /* type of buffer */ enum kbase_memory_type type; + /* Kernel side mapping of the alloc, shall never be referred directly. + * kbase_phy_alloc_mapping_get() & kbase_phy_alloc_mapping_put() pair + * should be used around access to the kernel-side CPU mapping so that + * mapping doesn't disappear whilst it is being accessed. + */ + struct kbase_vmap_struct *permanent_map; + unsigned long properties; /* member in union valid based on @a type */ @@ -142,8 +151,13 @@ struct kbase_mem_phy_alloc { size_t nents; struct kbase_aliased *aliased; } alias; - /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ - struct kbase_context *kctx; + struct { + struct kbase_context *kctx; + /* Number of pages in this structure, including *pages. + * Used for kernel memory tracking. + */ + size_t nr_struct_pages; + } native; struct kbase_alloc_import_user_buf { unsigned long address; unsigned long size; @@ -226,7 +240,7 @@ struct kbase_va_region { struct rb_node rblink; struct list_head link; - struct kbase_context *kctx; /* Backlink to base context */ + struct rb_root *rbtree; /* Backlink to rb tree */ u64 start_pfn; /* The PFN in GPU space */ size_t nr_pages; @@ -244,14 +258,18 @@ struct kbase_va_region { #define KBASE_REG_GPU_NX (1ul << 3) /* Is CPU cached? */ #define KBASE_REG_CPU_CACHED (1ul << 4) -/* Is GPU cached? */ +/* Is GPU cached? + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + */ #define KBASE_REG_GPU_CACHED (1ul << 5) #define KBASE_REG_GROWABLE (1ul << 6) /* Can grow on pf? */ #define KBASE_REG_PF_GROW (1ul << 7) -/* Bit 8 is unused */ +/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ +#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) /* inner shareable coherency */ #define KBASE_REG_SHARE_IN (1ul << 9) @@ -291,32 +309,33 @@ struct kbase_va_region { /* Memory is handled by JIT - user space should not be able to free it */ #define KBASE_REG_JIT (1ul << 24) +/* Memory has permanent kernel side mapping */ +#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ /* - * On a 32bit platform, custom VA should be wired from (4GB + shader region) + * On a 32bit platform, custom VA should be wired from 4GB * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). * So we put the default limit to the maximum possible on Linux and shrink * it down, if required by the GPU, during initialization. */ -/* - * Dedicated 16MB region for shader code: - * VA range 0x101000000-0x102000000 - */ -#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) -#define KBASE_REG_ZONE_EXEC_BASE (0x101000000ULL >> PAGE_SHIFT) -#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) - -#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) -/* Starting after KBASE_REG_ZONE_EXEC */ -#define KBASE_REG_ZONE_CUSTOM_VA_BASE \ - (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) +#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) #define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) /* end 32-bit clients only */ +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + + unsigned long flags; size_t extent; /* nr of pages alloc'd on PF */ @@ -371,7 +390,9 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) #define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ -static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) +static inline struct kbase_mem_phy_alloc *kbase_alloc_create( + struct kbase_context *kctx, size_t nr_pages, + enum kbase_memory_type type) { struct kbase_mem_phy_alloc *alloc; size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; @@ -401,6 +422,13 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en if (!alloc) return ERR_PTR(-ENOMEM); + if (type == KBASE_MEM_TYPE_NATIVE) { + alloc->imported.native.nr_struct_pages = + (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + kbase_process_page_usage_inc(kctx, + alloc->imported.native.nr_struct_pages); + } + /* Store allocation method */ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; @@ -427,23 +455,23 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, KBASE_DEBUG_ASSERT(!reg->gpu_alloc); KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); - reg->cpu_alloc = kbase_alloc_create(reg->nr_pages, + reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, KBASE_MEM_TYPE_NATIVE); if (IS_ERR(reg->cpu_alloc)) return PTR_ERR(reg->cpu_alloc); else if (!reg->cpu_alloc) return -ENOMEM; - reg->cpu_alloc->imported.kctx = kctx; + reg->cpu_alloc->imported.native.kctx = kctx; if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) && (reg->flags & KBASE_REG_CPU_CACHED)) { - reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, + reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, KBASE_MEM_TYPE_NATIVE); if (IS_ERR_OR_NULL(reg->gpu_alloc)) { kbase_mem_phy_alloc_put(reg->cpu_alloc); return -ENOMEM; } - reg->gpu_alloc->imported.kctx = kctx; + reg->gpu_alloc->imported.native.kctx = kctx; } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } @@ -771,23 +799,75 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); */ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); +/** + * kbase_region_tracker_init - Initialize the region tracker data structure + * @kctx: kbase context + * + * Return: 0 if success, negative error code otherwise. + */ int kbase_region_tracker_init(struct kbase_context *kctx); + +/** + * kbase_region_tracker_init_jit - Initialize the JIT region + * @kctx: kbase context + * @jit_va_pages: Size of the JIT region in pages + * @max_allocations: Maximum number of allocations allowed for the JIT region + * @trim_level: Trim level for the JIT region + * + * Return: 0 if success, negative error code otherwise. + */ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, u8 max_allocations, u8 trim_level); + +/** + * kbase_region_tracker_init_exec - Initialize the EXEC_VA region + * @kctx: kbase context + * @exec_va_pages: Size of the JIT region in pages. + * It must not be greater than 4 GB. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ void kbase_region_tracker_term(struct kbase_context *kctx); -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); +/** + * kbase_region_tracker_term_rbtree - Free memory for a region tracker + * + * This will free all the regions within the region tracker + * + * @rbtree: Region tracker tree root + */ +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr); /** * @brief Check that a pointer is actually a valid region. * * Must be called with context lock held. */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, + u64 gpu_addr); -struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone); +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone); void kbase_free_alloced_region(struct kbase_va_region *reg); -int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align); +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, u64 addr, size_t nr_pages, + size_t align); +int kbase_remove_va_region(struct kbase_va_region *reg); bool kbase_check_alloc_flags(unsigned long flags); bool kbase_check_import_flags(unsigned long flags); @@ -831,25 +911,44 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); -int kbase_mmu_init(struct kbase_context *kctx); -void kbase_mmu_term(struct kbase_context *kctx); - -phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); -void kbase_mmu_free_pgd(struct kbase_context *kctx); -int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); -int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); +/** + * kbase_mmu_init - Initialise an object representing GPU page tables + * + * The structure should be terminated using kbase_mmu_term() + * + * @kbdev: kbase device + * @mmut: structure to initialise + * @kctx: optional kbase context, may be NULL if this set of MMU tables is not + * associated with a context + */ +int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct kbase_context *kctx); +/** + * kbase_mmu_term - Terminate an object representing GPU page tables + * + * This will free any page tables that have been allocated + * + * @kbdev: kbase device + * @mmut: kbase_mmu_table to be destroyed + */ +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr); int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags); -int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr); -int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + size_t nr, int as_nr); int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags); @@ -869,11 +968,19 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); /** + * kbase_mmu_update - Configure an address space on the GPU to the specified + * MMU tables + * * The caller has the following locking conditions: * - It must hold kbase_device->mmu_hw_mutex * - It must hold the hwaccess_lock + * + * @kbdev: Kbase device structure + * @mmut: The set of MMU tables to be configured on the address space + * @as_nr: The address space to be configured */ -void kbase_mmu_update(struct kbase_context *kctx); +void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr); /** * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. @@ -1046,6 +1153,8 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase * Note : The caller must not hold vm_lock, as this could cause a deadlock if * the kernel OoM killer runs. If the caller must allocate pages while holding * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + * + * This function cannot be used from interrupt context */ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); @@ -1056,7 +1165,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * @pool: Memory pool to allocate from * @nr_pages_requested: number of physical pages to allocate * @prealloc_sa: Information about the partial allocation if the amount - * of memory requested is not a multiple of 2MB. + * of memory requested is not a multiple of 2MB. One + * instance of struct kbase_sub_alloc must be allocated by + * the caller iff CONFIG_MALI_2MB_ALLOC is enabled. * * Allocates \a nr_pages_requested and updates the alloc object. This function * does not allocate new pages from the kernel, and therefore will never trigger @@ -1083,10 +1194,13 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * allocation can complete without another thread using the newly grown pages. * * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then - * @pool must be alloc->imported.kctx->lp_mem_pool. Otherwise it must be - * alloc->imported.kctx->mem_pool. - * - * @prealloc_sa shall be set to NULL if it has been consumed by this function. + * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be + * alloc->imported.native.kctx->mem_pool. + * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be + * pre-allocated because we must not sleep (due to the usage of kmalloc()) + * whilst holding pool->pool_lock. + * @prealloc_sa shall be set to NULL if it has been consumed by this function + * to indicate that the caller must not free it. * * Return: Pointer to array of allocated pages. NULL on failure. * @@ -1156,17 +1270,17 @@ static inline void kbase_clear_dma_addr(struct page *p) } /** -* @brief Process a bus or page fault. -* -* This function will process a fault on a specific address space -* -* @param[in] kbdev The @ref kbase_device the fault happened on -* @param[in] kctx The @ref kbase_context for the faulting address space if -* one was found. -* @param[in] as The address space that has the fault -*/ + * kbase_mmu_interrupt_process - Process a bus or page fault. + * @kbdev The kbase_device the fault happened on + * @kctx The kbase_context for the faulting address space if one was found. + * @as The address space that has the fault + * @fault Data relating to the fault + * + * This function will process a fault on a specific address space + */ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as); + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault); /** * @brief Process a page fault. @@ -1272,6 +1386,18 @@ bool kbase_jit_evict(struct kbase_context *kctx); */ void kbase_jit_term(struct kbase_context *kctx); +/** + * kbase_has_exec_va_zone - EXEC_VA zone predicate + * + * Determine whether an EXEC_VA zone has been created for the GPU address space + * of the given kbase context. + * + * @kctx: kbase context + * + * Return: True if the kbase context has an EXEC_VA zone. + */ +bool kbase_has_exec_va_zone(struct kbase_context *kctx); + /** * kbase_map_external_resource - Map an external resource to the GPU. * @kctx: kbase context. @@ -1358,4 +1484,5 @@ static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) */ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 59cc03538f1b..c70112d275f0 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -47,18 +47,193 @@ #include #include -#include #include #include + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map); +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map); + static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); +/* Retrieve the associated region pointer if the GPU address corresponds to + * one of the event memory pages. The enclosing region, if found, shouldn't + * have been marked as free. + */ +static struct kbase_va_region *kbase_find_event_mem_region( + struct kbase_context *kctx, u64 gpu_addr) +{ + + return NULL; +} + +/** + * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping + * of the physical allocation belonging to a + * region + * @kctx: The kernel base context @reg belongs to. + * @reg: The region whose physical allocation is to be mapped + * @vsize: The size of the requested region, in pages + * @size: The size in pages initially committed to the region + * + * Return: 0 on success, otherwise an error code indicating failure + * + * Maps the physical allocation backing a non-free @reg, so it may be + * accessed directly from the kernel. This is only supported for physical + * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of + * physical allocation. + * + * The mapping is stored directly in the allocation that backs @reg. The + * refcount is not incremented at this point. Instead, use of the mapping should + * be surrounded by kbase_phy_alloc_mapping_get() and + * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the + * client is accessing it. + * + * Both cached and uncached regions are allowed, but any sync operations are the + * responsibility of the client using the permanent mapping. + * + * A number of checks are made to ensure that a region that needs a permanent + * mapping can actually be supported: + * - The region must be created as fully backed + * - The region must not be growable + * + * This function will fail if those checks are not satisfied. + * + * On success, the region will also be forced into a certain kind: + * - It will no longer be growable + */ +static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t vsize, size_t size) +{ + size_t size_bytes = (size << PAGE_SHIFT); + struct kbase_vmap_struct *kern_mapping; + int err = 0; + + /* Can only map in regions that are always fully committed + * Don't setup the mapping twice + * Only support KBASE_MEM_TYPE_NATIVE allocations + */ + if (vsize != size || reg->cpu_alloc->permanent_map != NULL || + reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + return -EINVAL; + + if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + kctx->permanent_mapped_pages)) { + dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages", + (u64)size, + KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, + kctx->permanent_mapped_pages); + return -ENOMEM; + } + + kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); + if (!kern_mapping) + return -ENOMEM; + + err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); + if (err < 0) + goto vmap_fail; + + /* No support for growing or shrinking mapped regions */ + reg->flags &= ~KBASE_REG_GROWABLE; + + reg->cpu_alloc->permanent_map = kern_mapping; + kctx->permanent_mapped_pages += size; + + return 0; +vmap_fail: + kfree(kern_mapping); + return err; +} + +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + WARN_ON(!alloc->permanent_map); + kbase_vunmap_phy_pages(kctx, alloc->permanent_map); + kfree(alloc->permanent_map); + + alloc->permanent_map = NULL; + + /* Mappings are only done on cpu_alloc, so don't need to worry about + * this being reduced a second time if a separate gpu_alloc is + * freed + */ + WARN_ON(alloc->nents > kctx->permanent_mapped_pages); + kctx->permanent_mapped_pages -= alloc->nents; +} + +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, + u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping) +{ + struct kbase_va_region *reg; + void *kern_mem_ptr = NULL; + struct kbase_vmap_struct *kern_mapping; + u64 mapping_offset; + + WARN_ON(!kctx); + WARN_ON(!out_kern_mapping); + + kbase_gpu_vm_lock(kctx); + + /* First do a quick lookup in the list of event memory regions */ + reg = kbase_find_event_mem_region(kctx, gpu_addr); + + if (!reg) { + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + } + + if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0) + goto out_unlock; + + kern_mapping = reg->cpu_alloc->permanent_map; + if (kern_mapping == NULL) + goto out_unlock; + + mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + + /* Refcount the allocations to prevent them disappearing */ + WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); + WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); + + kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); + *out_kern_mapping = kern_mapping; +out_unlock: + kbase_gpu_vm_unlock(kctx); + return kern_mem_ptr; +} + +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping) +{ + WARN_ON(!kctx); + WARN_ON(!kern_mapping); + + WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); + WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); + + kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); + kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); + + /* kern_mapping and the gpu/cpu phy allocs backing it must not be used + * from now on + */ +} + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va) { int zone; struct kbase_va_region *reg; + struct rb_root *rbtree; struct device *dev; KBASE_DEBUG_ASSERT(kctx); @@ -75,6 +250,21 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto bad_flags; } +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { + /* Mask coherency flags if infinite cache is enabled to prevent + * the skipping of syncs from BASE side. + */ + *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | + BASE_MEM_COHERENT_SYSTEM); + } +#endif + + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && !kbase_device_is_cpu_coherent(kctx->kbdev)) { dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); @@ -90,14 +280,18 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto bad_sizes; /* find out which VA zone to use */ - if (*flags & BASE_MEM_SAME_VA) + if (*flags & BASE_MEM_SAME_VA) { + rbtree = &kctx->reg_rbtree_same; zone = KBASE_REG_ZONE_SAME_VA; - else if (*flags & BASE_MEM_PROT_GPU_EX) - zone = KBASE_REG_ZONE_EXEC; - else + } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { + rbtree = &kctx->reg_rbtree_exec; + zone = KBASE_REG_ZONE_EXEC_VA; + } else { + rbtree = &kctx->reg_rbtree_custom; zone = KBASE_REG_ZONE_CUSTOM_VA; + } - reg = kbase_alloc_free_region(kctx, 0, va_pages, zone); + reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone); if (!reg) { dev_err(dev, "Failed to allocate free region"); goto no_region; @@ -129,6 +323,21 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, kbase_gpu_vm_lock(kctx); + if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { + /* Permanent kernel mappings must happen as soon as + * reg->cpu_alloc->pages is ready. Currently this happens after + * kbase_alloc_phy_pages(). If we move that to setup pages + * earlier, also move this call too + */ + int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, + commit_pages); + if (err < 0) { + kbase_gpu_vm_unlock(kctx); + goto no_kern_mapping; + } + } + + /* mmap needed to setup VA? */ if (*flags & BASE_MEM_SAME_VA) { unsigned long prot = PROT_NONE; @@ -197,6 +406,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, no_mmap: no_cookie: +no_kern_mapping: no_mem: kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); @@ -279,6 +489,10 @@ int kbase_mem_query(struct kbase_context *kctx, } if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) *out |= BASE_MEM_TILER_ALIGN_TOP; + if (!(KBASE_REG_GPU_CACHED & reg->flags)) + *out |= BASE_MEM_UNCACHED_GPU; + if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) + *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); @@ -441,7 +655,7 @@ void kbase_mem_evictable_deinit(struct kbase_context *kctx) */ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; int __maybe_unused new_page_count; kbase_process_page_usage_dec(kctx, alloc->nents); @@ -461,7 +675,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) static void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) { - struct kbase_context *kctx = alloc->imported.kctx; + struct kbase_context *kctx = alloc->imported.native.kctx; int __maybe_unused new_page_count; new_page_count = kbase_atomic_add_pages(alloc->nents, @@ -480,7 +694,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) { - struct kbase_context *kctx = gpu_alloc->imported.kctx; + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; lockdep_assert_held(&kctx->reg_lock); @@ -505,7 +719,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) { - struct kbase_context *kctx = gpu_alloc->imported.kctx; + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; int err = 0; lockdep_assert_held(&kctx->reg_lock); @@ -619,6 +833,12 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) goto out_unlock; + /* shareability flags are ignored for GPU uncached memory */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + ret = 0; + goto out_unlock; + } + /* no change? */ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { ret = 0; @@ -697,15 +917,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, + 0, *va_pages, KBASE_REG_ZONE_SAME_VA); } else { - reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) goto no_region; - reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, + KBASE_MEM_TYPE_IMPORTED_UMM); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -714,13 +940,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, /* No pages to map yet */ reg->gpu_alloc->nents = 0; - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - reg->flags &= ~KBASE_REG_FREE; reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ - reg->flags |= KBASE_REG_GPU_CACHED; if (*flags & BASE_MEM_SECURE) reg->flags |= KBASE_REG_SECURE; @@ -737,10 +959,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, return reg; -invalid_flags: - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kbase_mem_phy_alloc_put(reg->cpu_alloc); no_alloc_obj: +invalid_flags: kfree(reg); no_region: bad_size: @@ -752,11 +972,11 @@ no_buf: } #endif /* CONFIG_DMA_SHARED_BUFFER */ -static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx) +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) { u32 cpu_cache_line_size = cache_line_size(); u32 gpu_cache_line_size = - (1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size); + (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); return ((cpu_cache_line_size > gpu_cache_line_size) ? cpu_cache_line_size : @@ -769,15 +989,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( { long i; struct kbase_va_region *reg; + struct rb_root *rbtree; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; bool shared_zone = false; - u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx); + u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; if ((address & (cache_line_alignment - 1)) != 0 || (size & (cache_line_alignment - 1)) != 0) { + if (*flags & BASE_MEM_UNCACHED_GPU) { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned and marked as GPU uncached\n"); + goto bad_size; + } + /* Coherency must be enabled to handle partial cache lines */ if (*flags & (BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { @@ -820,14 +1047,16 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; zone = KBASE_REG_ZONE_SAME_VA; - } + rbtree = &kctx->reg_rbtree_same; + } else + rbtree = &kctx->reg_rbtree_custom; - reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); + reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); if (!reg) goto no_region; - reg->gpu_alloc = kbase_alloc_create(*va_pages, + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -852,8 +1081,11 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( #else mmgrab(current->mm); #endif - user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *), - GFP_KERNEL); + if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); + else + user_buf->pages = kmalloc_array(*va_pages, + sizeof(struct page *), GFP_KERNEL); if (!user_buf->pages) goto no_page_array; @@ -965,7 +1197,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* mask to only allowed flags */ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | - BASE_MEM_COHERENT_SYSTEM_REQUIRED); + BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { dev_warn(kctx->kbdev->dev, @@ -994,21 +1226,23 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx, 0, *num_pages, - KBASE_REG_ZONE_SAME_VA); + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, + *num_pages, + KBASE_REG_ZONE_SAME_VA); } else { #else if (1) { #endif - reg = kbase_alloc_free_region(kctx, 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); } if (!reg) goto no_reg; /* zero-sized page array, as we don't need one/can support one */ - reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS); + reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS); if (IS_ERR_OR_NULL(reg->gpu_alloc)) goto no_alloc_obj; @@ -1052,6 +1286,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, goto bad_handle; /* Free region */ if (aliasing_reg->flags & KBASE_REG_DONT_NEED) goto bad_handle; /* Ephemeral region */ + if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) + goto bad_handle; /* GPU uncached memory */ if (!aliasing_reg->gpu_alloc) goto bad_handle; /* No alloc */ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) @@ -1159,6 +1395,11 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, goto bad_flags; } + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && !kbase_device_is_cpu_coherent(kctx->kbdev)) { dev_warn(kctx->kbdev->dev, @@ -1284,8 +1525,8 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, /* Map the new pages into the GPU */ phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags, kctx->as_nr); return ret; } @@ -1312,8 +1553,8 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, u64 delta = old_pages - new_pages; int ret = 0; - ret = kbase_mmu_teardown_pages(kctx, - reg->start_pfn + new_pages, delta); + ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn + new_pages, delta, kctx->as_nr); return ret; } @@ -1545,12 +1786,19 @@ const struct vm_operations_struct kbase_vm_ops = { .fault = kbase_cpu_vm_fault }; -static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close) +static int kbase_cpu_mmap(struct kbase_context *kctx, + struct kbase_va_region *reg, + struct vm_area_struct *vma, + void *kaddr, + size_t nr_pages, + unsigned long aligned_offset, + int free_on_close) { struct kbase_cpu_mapping *map; struct tagged_addr *page_array; int err = 0; int i; + u64 start_off; map = kzalloc(sizeof(*map), GFP_KERNEL); @@ -1583,6 +1831,38 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm vma->vm_private_data = map; page_array = kbase_get_cpu_phy_pages(reg); + start_off = vma->vm_pgoff - reg->start_pfn + + (aligned_offset >> PAGE_SHIFT); + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { + struct kbase_aliased *aliased = + reg->cpu_alloc->imported.alias.aliased; + + if (!reg->cpu_alloc->imported.alias.stride || + reg->nr_pages < (start_off + nr_pages)) { + err = -EINVAL; + goto out; + } + + while (start_off >= reg->cpu_alloc->imported.alias.stride) { + aliased++; + start_off -= reg->cpu_alloc->imported.alias.stride; + } + + if (!aliased->alloc) { + /* sink page not available for dumping map */ + err = -EINVAL; + goto out; + } + + if ((start_off + nr_pages) > aliased->length) { + /* not fully backed by physical pages */ + err = -EINVAL; + goto out; + } + + /* ready the pages for dumping map */ + page_array = aliased->alloc->pages + aliased->offset; + } if (!(reg->flags & KBASE_REG_CPU_CACHED) && (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { @@ -1597,8 +1877,6 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm if (!kaddr) { unsigned long addr = vma->vm_start + aligned_offset; - u64 start_off = vma->vm_pgoff - reg->start_pfn + - (aligned_offset>>PAGE_SHIFT); vma->vm_flags |= VM_PFNMAP; for (i = 0; i < nr_pages; i++) { @@ -1627,7 +1905,7 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm map->region = reg; map->free_on_close = free_on_close; - map->kctx = reg->kctx; + map->kctx = kctx; map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); map->count = 1; /* start with one ref */ @@ -1640,91 +1918,6 @@ static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vm return err; } -static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr) -{ - struct kbase_va_region *new_reg; - u32 nr_pages; - size_t size; - int err = 0; - u32 *tb; - int owns_tb = 1; - - dev_dbg(kctx->kbdev->dev, "in %s\n", __func__); - size = (vma->vm_end - vma->vm_start); - nr_pages = size >> PAGE_SHIFT; - - if (!kctx->jctx.tb) { - KBASE_DEBUG_ASSERT(0 != size); - tb = vmalloc_user(size); - - if (NULL == tb) { - err = -ENOMEM; - goto out; - } - - err = kbase_device_trace_buffer_install(kctx, tb, size); - if (err) { - vfree(tb); - goto out; - } - } else { - err = -EINVAL; - goto out; - } - - *kaddr = kctx->jctx.tb; - - new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); - if (!new_reg) { - err = -ENOMEM; - WARN_ON(1); - goto out_no_region; - } - - new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB); - if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { - err = -ENOMEM; - new_reg->cpu_alloc = NULL; - WARN_ON(1); - goto out_no_alloc; - } - - new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); - - new_reg->cpu_alloc->imported.kctx = kctx; - new_reg->flags &= ~KBASE_REG_FREE; - new_reg->flags |= KBASE_REG_CPU_CACHED; - - /* alloc now owns the tb */ - owns_tb = 0; - - if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { - err = -ENOMEM; - WARN_ON(1); - goto out_no_va_region; - } - - *reg = new_reg; - - /* map read only, noexec */ - vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); - /* the rest of the flags is added by the cpu_mmap handler */ - - dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); - return 0; - -out_no_va_region: -out_no_alloc: - kbase_free_alloced_region(new_reg); -out_no_region: - if (owns_tb) { - kbase_device_trace_buffer_uninstall(kctx); - vfree(tb); - } -out: - return err; -} - static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) { struct kbase_va_region *new_reg; @@ -1744,14 +1937,15 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct goto out; } - new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); + new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, + KBASE_REG_ZONE_SAME_VA); if (!new_reg) { err = -ENOMEM; WARN_ON(1); goto out; } - new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW); + new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW); if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { err = -ENOMEM; new_reg->cpu_alloc = NULL; @@ -1918,14 +2112,6 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) /* Illegal handle for direct map */ err = -EINVAL; goto out_unlock; - case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE): - err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr); - if (0 != err) - goto out_unlock; - dev_dbg(dev, "kbase_trace_buffer_mmap ok\n"); - /* free the region on munmap */ - free_on_close = 1; - break; case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); @@ -1983,8 +2169,19 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) } #endif /* CONFIG_DMA_SHARED_BUFFER */ - /* limit what we map to the amount currently backed */ - if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + /* initial params check for aliased dumping map */ + if (nr_pages > reg->gpu_alloc->imported.alias.stride || + !reg->gpu_alloc->imported.alias.stride || + !nr_pages) { + err = -EINVAL; + dev_warn(dev, "mmap aliased: invalid params!\n"); + goto out_unlock; + } + } + else if (reg->cpu_alloc->nents < + (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + /* limit what we map to the amount currently backed */ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) nr_pages = 0; else @@ -1997,7 +2194,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) } /* default */ } /* switch */ - err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); + err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, + free_on_close); if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on @@ -2016,11 +2214,11 @@ out: KBASE_EXPORT_TEST_API(kbase_mmap); -static void kbasep_sync_mem_regions(struct kbase_context *kctx, +void kbase_sync_mem_regions(struct kbase_context *kctx, struct kbase_vmap_struct *map, enum kbase_sync_type dest) { size_t i; - off_t const offset = (uintptr_t)map->gpu_addr & ~PAGE_MASK; + off_t const offset = map->offset_in_page; size_t const page_count = PFN_UP(offset + map->size); /* Sync first page */ @@ -2046,66 +2244,55 @@ static void kbasep_sync_mem_regions(struct kbase_context *kctx, } } -void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, - unsigned long prot_request, struct kbase_vmap_struct *map) +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map) { - struct kbase_va_region *reg; unsigned long page_index; - unsigned int offset = gpu_addr & ~PAGE_MASK; - size_t page_count = PFN_UP(offset + size); + unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; + size_t page_count = PFN_UP(offset_in_page + size); struct tagged_addr *page_array; struct page **pages; void *cpu_addr = NULL; pgprot_t prot; size_t i; - if (!size || !map) - return NULL; + if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) + return -EINVAL; /* check if page_count calculation will wrap */ if (size > ((size_t)-1 / PAGE_SIZE)) - return NULL; - - kbase_gpu_vm_lock(kctx); - - reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto out_unlock; + return -EINVAL; - page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn; + page_index = offset_bytes >> PAGE_SHIFT; /* check if page_index + page_count will wrap */ if (-1UL - page_count < page_index) - goto out_unlock; + return -EINVAL; if (page_index + page_count > kbase_reg_current_backed_size(reg)) - goto out_unlock; + return -ENOMEM; if (reg->flags & KBASE_REG_DONT_NEED) - goto out_unlock; + return -EINVAL; - /* check access permissions can be satisfied - * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ - if ((reg->flags & prot_request) != prot_request) - goto out_unlock; + prot = PAGE_KERNEL; + if (!(reg->flags & KBASE_REG_CPU_CACHED)) { + /* Map uncached */ + prot = pgprot_writecombine(prot); + } page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) - goto out_unlock; + return -ENOMEM; pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); if (!pages) - goto out_unlock; + return -ENOMEM; for (i = 0; i < page_count; i++) - pages[i] = phys_to_page(as_phys_addr_t(page_array[page_index + - i])); + pages[i] = as_page(page_array[page_index + i]); - prot = PAGE_KERNEL; - if (!(reg->flags & KBASE_REG_CPU_CACHED)) { - /* Map uncached */ - prot = pgprot_writecombine(prot); - } /* Note: enforcing a RO prot_request onto prot is not done, since: * - CPU-arch-specific integration required * - kbase_vmap() requires no access checks to be made/enforced */ @@ -2115,26 +2302,66 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, kfree(pages); if (!cpu_addr) - goto out_unlock; + return -ENOMEM; - map->gpu_addr = gpu_addr; - map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + map->offset_in_page = offset_in_page; + map->cpu_alloc = reg->cpu_alloc; map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; - map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + map->gpu_alloc = reg->gpu_alloc; map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; - map->addr = (void *)((uintptr_t)cpu_addr + offset); + map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); map->size = size; map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && !kbase_mem_is_imported(map->gpu_alloc->type); if (map->sync_needed) - kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); - kbase_gpu_vm_unlock(kctx); + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); - return map->addr; + return 0; +} + +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map) +{ + struct kbase_va_region *reg; + void *addr = NULL; + u64 offset_bytes; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + int err; + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto out_unlock; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ + if ((reg->flags & prot_request) != prot_request) + goto out_unlock; + + offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); + if (err < 0) + goto fail_vmap_phy_pages; + + addr = map->addr; out_unlock: kbase_gpu_vm_unlock(kctx); + return addr; + +fail_vmap_phy_pages: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(cpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); + return NULL; } @@ -2150,22 +2377,29 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, } KBASE_EXPORT_TEST_API(kbase_vmap); -void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map) { void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); vunmap(addr); if (map->sync_needed) - kbasep_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); - map->gpu_addr = 0; - map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); - map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + + map->offset_in_page = 0; map->cpu_pages = NULL; map->gpu_pages = NULL; map->addr = NULL; map->size = 0; map->sync_needed = false; } + +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +{ + kbase_vunmap_phy_pages(kctx, map); + map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); + map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); +} KBASE_EXPORT_TEST_API(kbase_vunmap); void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) @@ -2250,133 +2484,4 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ return 0; } -void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle) -{ - int res; - void *va; - dma_addr_t dma_pa; - struct kbase_va_region *reg; - struct tagged_addr *page_array; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - unsigned long attrs = DMA_ATTR_WRITE_COMBINE; -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - DEFINE_DMA_ATTRS(attrs); -#endif - - u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; - u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | - BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; - u32 i; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(0 != size); - KBASE_DEBUG_ASSERT(0 != pages); - - if (size == 0) - goto err; - - /* All the alloc calls return zeroed memory */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, - &attrs); -#else - va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); -#endif - if (!va) - goto err; - - /* Store the state so we can free it later. */ - handle->cpu_va = va; - handle->dma_pa = dma_pa; - handle->size = size; - - - reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA); - if (!reg) - goto no_reg; - - reg->flags &= ~KBASE_REG_FREE; - if (kbase_update_region_flags(kctx, reg, flags) != 0) - goto invalid_flags; - - reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); - if (IS_ERR_OR_NULL(reg->cpu_alloc)) - goto no_alloc; - - reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - - page_array = kbase_get_cpu_phy_pages(reg); - - for (i = 0; i < pages; i++) - page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT)); - - reg->cpu_alloc->nents = pages; - - kbase_gpu_vm_lock(kctx); - res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); - kbase_gpu_vm_unlock(kctx); - if (res) - goto no_mmap; - - return va; - -no_mmap: - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); -no_alloc: -invalid_flags: - kfree(reg); -no_reg: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); -#else - dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); -#endif -err: - return NULL; -} -KBASE_EXPORT_SYMBOL(kbase_va_alloc); - -void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle) -{ - struct kbase_va_region *reg; - int err; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - DEFINE_DMA_ATTRS(attrs); -#endif - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); - - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); - KBASE_DEBUG_ASSERT(reg); - err = kbase_gpu_munmap(kctx, reg); - kbase_gpu_vm_unlock(kctx); - KBASE_DEBUG_ASSERT(!err); - - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kfree(reg); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) - dma_free_attrs(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - dma_free_attrs(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa, &attrs); -#else - dma_free_writecombine(kctx->kbdev->dev, handle->size, - handle->cpu_va, handle->dma_pa); -#endif -} -KBASE_EXPORT_SYMBOL(kbase_va_free); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index a14826ebc772..5cb88d19426a 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -206,7 +206,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); struct kbase_vmap_struct { - u64 gpu_addr; + off_t offset_in_page; struct kbase_mem_phy_alloc *cpu_alloc; struct kbase_mem_phy_alloc *gpu_alloc; struct tagged_addr *cpu_pages; @@ -303,23 +303,21 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, */ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); -/** @brief Allocate memory from kernel space and map it onto the GPU - * - * @param kctx The context used for the allocation/mapping - * @param size The size of the allocation in bytes - * @param handle An opaque structure used to contain the state needed to free the memory - * @return the VA for kernel space and GPU MMU - */ -void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle); +extern const struct vm_operations_struct kbase_vm_ops; -/** @brief Free/unmap memory allocated by kbase_va_alloc - * - * @param kctx The context used for the allocation/mapping - * @param handle An opaque structure returned by the kbase_va_alloc function. +/** + * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode + * CPU mapping. + * @kctx: Context the CPU mapping belongs to. + * @map: Structure describing the CPU mapping, setup previously by the + * kbase_vmap() call. + * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) + * + * Note: The caller shall ensure that CPU mapping is not revoked & remains + * active whilst the maintenance is in progress. */ -void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle); - -extern const struct vm_operations_struct kbase_vm_ops; +void kbase_sync_mem_regions(struct kbase_context *kctx, + struct kbase_vmap_struct *map, enum kbase_sync_type dest); /** * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation @@ -352,4 +350,94 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_pages, u64 old_pages); +/** + * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @alloc: Pointer to the allocation to terminate + * + * This function will unmap the kernel mapping, and free any structures used to + * track it. + */ +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent + * mapping of a physical allocation + * @kctx: The kernel base context @gpu_addr will be looked up in + * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping + * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer + * which will be used for a call to + * kbase_phy_alloc_mapping_put() + * + * Return: Pointer to a kernel-side accessible location that directly + * corresponds to @gpu_addr, or NULL on failure + * + * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access + * that location kernel-side. Only certain kinds of memory have a permanent + * kernel mapping, refer to the internal functions + * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more + * information. + * + * If this function succeeds, a CPU access to the returned pointer will access + * the actual location represented by @gpu_addr. That is, the return value does + * not require any offset added to it to access the location specified in + * @gpu_addr + * + * The client must take care to either apply any necessary sync operations when + * accessing the data, or ensure that the enclosing region was coherent with + * the GPU, or uncached in the CPU. + * + * The refcount on the physical allocations backing the region are taken, so + * that they do not disappear whilst the client is accessing it. Once the + * client has finished accessing the memory, it must be released with a call to + * kbase_phy_alloc_mapping_put() + * + * Whilst this is expected to execute quickly (the mapping was already setup + * when the physical allocation was created), the call is not IRQ-safe due to + * the region lookup involved. + * + * An error code may indicate that: + * - a userside process has freed the allocation, and so @gpu_addr is no longer + * valid + * - the region containing @gpu_addr does not support a permanent kernel mapping + */ +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping); + +/** + * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained + * from a call to kbase_phy_alloc_mapping_get() + * + * Releases the reference to the allocations backing @kern_mapping that was + * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used + * when the client no longer needs to access the kernel-side CPU pointer. + * + * If this was the last reference on the underlying physical allocations, they + * will go through the normal allocation free steps, which also includes an + * unmap of the permanent kernel mapping for those allocations. + * + * Due to these operations, the function is not IRQ-safe. However it is + * expected to execute quickly in the normal case, i.e. when the region holding + * the physical allocation is still present. + */ +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping); + +/** + * kbase_get_cache_line_alignment - Return cache line alignment + * + * Helper function to return the maximum cache line alignment considering + * both CPU and GPU cache sizes. + * + * Return: CPU and GPU cache line alignment, in bytes. + * + * @kbdev: Device pointer. + */ +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); + #endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h index 6581ecfc95a0..70116030f233 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h @@ -74,6 +74,17 @@ static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) return t.tagged_addr & PAGE_MASK; } +/** + * as_page - Retrieve the struct page from a tagged address + * @t: tagged address to be translated. + * + * Return: pointer to struct page corresponding to tagged address. + */ +static inline struct page *as_page(struct tagged_addr t) +{ + return phys_to_page(as_phys_addr_t(t)); +} + /** * as_tagged - Convert the physical address to tagged address type though * there is no tag info present, the lower order 12 bits will be 0 diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 1255df0fc1ae..0f91be17a81b 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -414,9 +414,10 @@ void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) void kbase_mem_pool_term(struct kbase_mem_pool *pool) { struct kbase_mem_pool *next_pool = pool->next_pool; - struct page *p; + struct page *p, *tmp; size_t nr_to_spill = 0; LIST_HEAD(spill_list); + LIST_HEAD(free_list); int i; pool_dbg(pool, "terminate()\n"); @@ -434,7 +435,6 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Zero pages first without holding the next_pool lock */ for (i = 0; i < nr_to_spill; i++) { p = kbase_mem_pool_remove_locked(pool); - kbase_mem_pool_zero_page(pool, p); list_add(&p->lru, &spill_list); } } @@ -442,18 +442,26 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) while (!kbase_mem_pool_is_empty(pool)) { /* Free remaining pages to kernel */ p = kbase_mem_pool_remove_locked(pool); - kbase_mem_pool_free_page(pool, p); + list_add(&p->lru, &free_list); } kbase_mem_pool_unlock(pool); if (next_pool && nr_to_spill) { + list_for_each_entry(p, &spill_list, lru) + kbase_mem_pool_zero_page(pool, p); + /* Add new page list to next_pool */ kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); } + list_for_each_entry_safe(p, tmp, &free_list, lru) { + list_del_init(&p->lru); + kbase_mem_pool_free_page(pool, p); + } + pool_dbg(pool, "terminated\n"); } @@ -678,7 +686,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, continue; if (is_huge_head(pages[i]) || !is_huge(pages[i])) { - p = phys_to_page(as_phys_addr_t(pages[i])); + p = as_page(pages[i]); if (zero) kbase_mem_pool_zero_page(pool, p); else if (sync) @@ -720,7 +728,7 @@ static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, continue; if (is_huge_head(pages[i]) || !is_huge(pages[i])) { - p = phys_to_page(as_phys_addr_t(pages[i])); + p = as_page(pages[i]); if (zero) kbase_mem_pool_zero_page(pool, p); else if (sync) @@ -780,7 +788,7 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, continue; } - p = phys_to_page(as_phys_addr_t(pages[i])); + p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); pages[i] = as_tagged(0); @@ -824,7 +832,7 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, continue; } - p = phys_to_page(as_phys_addr_t(pages[i])); + p = as_page(pages[i]); kbase_mem_pool_free_page(pool, p); pages[i] = as_tagged(0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h index 7f44d81e34e2..3c760717eef4 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h @@ -33,7 +33,7 @@ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT */ #define KBASE_MEM_PROFILE_MAX_BUF_SIZE \ - ((size_t) (64 + ((80 + (56 * 64)) * 31) + 56)) + ((size_t) (64 + ((80 + (56 * 64)) * 35) + 56)) #endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index a998930bfb98..84341ca18569 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #define KBASE_MMU_PAGE_ENTRIES 512 @@ -71,6 +71,19 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr, bool sync); +/** + * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. + * @kbdev: Device pointer. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * @as_nr: GPU address space number for which flush + invalidate is required. + * + * This is used for MMU tables which do not belong to a user space context. + */ +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr); + /** * kbase_mmu_sync_pgd - sync page directory to memory * @kbdev: Device pointer. @@ -101,8 +114,12 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, */ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str); + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault); +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags); /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to @@ -120,7 +137,8 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, * * Return: the number of backed pages to increase by */ -static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t fault_rel_pfn) +static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, + struct kbase_va_region *reg, size_t fault_rel_pfn) { size_t multiple = reg->extent; size_t reg_current_size = kbase_reg_current_backed_size(reg); @@ -128,7 +146,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t faul size_t remainder; if (!multiple) { - dev_warn(reg->kctx->kbdev->dev, + dev_warn(kbdev->dev, "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; @@ -171,22 +189,22 @@ static size_t reg_grow_calc_extra_pages(struct kbase_va_region *reg, size_t faul return minimum_extra + multiple - remainder; } -#ifdef CONFIG_MALI_JOB_DUMP -static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_context *kctx, +#ifdef CONFIG_MALI_CINSTR_GWT +static void kbase_gpu_mmu_handle_write_faulting_as( struct kbase_device *kbdev, struct kbase_as *faulting_as, u64 start_pfn, size_t nr, u32 op) { mutex_lock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, start_pfn, + kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn, nr, op, 1); mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); } @@ -196,6 +214,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, struct kbasep_gwt_list_element *pos; struct kbase_va_region *region; struct kbase_device *kbdev; + struct kbase_fault *fault; u64 fault_pfn, pfn_offset; u32 op; int ret; @@ -203,24 +222,27 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; kbase_gpu_vm_lock(kctx); /* Find region and check if it should be writable. */ region = kbase_region_tracker_find_region_enclosing_address(kctx, - faulting_as->fault_addr); + fault->addr); if (!region || region->flags & KBASE_REG_FREE) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU"); + "Memory is not mapped on the GPU", + &faulting_as->pf_data); return; } if (!(region->flags & KBASE_REG_GPU_WR)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Region does not have write permissions"); + "Region does not have write permissions", + &faulting_as->pf_data); return; } @@ -228,7 +250,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, * for job dumping if write tracking is enabled. */ if (kctx->gwt_enabled) { - u64 page_addr = faulting_as->fault_addr & PAGE_MASK; + u64 page_addr = fault->addr & PAGE_MASK; bool found = false; /* Check if this write was already handled. */ list_for_each_entry(pos, &kctx->gwt_current_list, link) { @@ -263,7 +285,7 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, else op = AS_COMMAND_FLUSH_PT; - kbase_gpu_mmu_handle_write_faulting_as(kctx, kbdev, faulting_as, + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, op); kbase_gpu_vm_unlock(kctx); @@ -272,31 +294,226 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, struct kbase_as *faulting_as) { - u32 fault_status; - - fault_status = faulting_as->fault_status; + struct kbase_fault *fault = &faulting_as->pf_data; - switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + switch (fault->status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); break; case AS_FAULTSTATUS_ACCESS_TYPE_EX: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Execute Permission fault"); + "Execute Permission fault", fault); break; case AS_FAULTSTATUS_ACCESS_TYPE_READ: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Read Permission fault"); + "Read Permission fault", fault); break; default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown Permission fault"); + "Unknown Permission fault", fault); break; } } #endif +#define MAX_POOL_LEVEL 2 + +/** + * page_fault_try_alloc - Try to allocate memory from a context pool + * @kctx: Context pointer + * @region: Region to grow + * @new_pages: Number of 4 kB pages to allocate + * @pages_to_grow: Pointer to variable to store number of outstanding pages on + * failure. This can be either 4 kB or 2 MB pages, depending on + * the number of pages requested. + * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true + * for 2 MB, false for 4 kB. + * @prealloc_sas: Pointer to kbase_sub_alloc structures + * + * This function will try to allocate as many pages as possible from the context + * pool, then if required will try to allocate the remaining pages from the + * device pool. + * + * This function will not allocate any new memory beyond that that is already + * present in the context or device pools. This is because it is intended to be + * called with the vm_lock held, which could cause recursive locking if the + * allocation caused the out-of-memory killer to run. + * + * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be + * a count of 2 MB pages, otherwise it will be a count of 4 kB pages. + * + * Return: true if successful, false on failure + */ +static bool page_fault_try_alloc(struct kbase_context *kctx, + struct kbase_va_region *region, size_t new_pages, + int *pages_to_grow, bool *grow_2mb_pool, + struct kbase_sub_alloc **prealloc_sas) +{ + struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL}; + struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL}; + size_t pages_alloced[MAX_POOL_LEVEL] = {0}; + struct kbase_mem_pool *pool, *root_pool; + int pool_level = 0; + bool alloc_failed = false; + size_t pages_still_required; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (new_pages >= (SZ_2M / SZ_4K)) { + root_pool = &kctx->lp_mem_pool; + *grow_2mb_pool = true; + } else { +#endif + root_pool = &kctx->mem_pool; + *grow_2mb_pool = false; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (region->gpu_alloc != region->cpu_alloc) + new_pages *= 2; + + pages_still_required = new_pages; + + /* Determine how many pages are in the pools before trying to allocate. + * Don't attempt to allocate & free if the allocation can't succeed. + */ + for (pool = root_pool; pool != NULL; pool = pool->next_pool) { + size_t pool_size_4k; + + kbase_mem_pool_lock(pool); + + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + if (pool_size_4k >= pages_still_required) + pages_still_required = 0; + else + pages_still_required -= pool_size_4k; + + kbase_mem_pool_unlock(pool); + + if (!pages_still_required) + break; + } + + if (pages_still_required) { + /* Insufficient pages in pools. Don't try to allocate - just + * request a grow. + */ + *pages_to_grow = pages_still_required; + + return false; + } + + /* Since we've dropped the pool locks, the amount of memory in the pools + * may change between the above check and the actual allocation. + */ + pool = root_pool; + for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) { + size_t pool_size_4k; + size_t pages_to_alloc_4k; + size_t pages_to_alloc_4k_per_alloc; + + kbase_mem_pool_lock(pool); + + /* Allocate as much as possible from this pool*/ + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + pages_to_alloc_4k = MIN(new_pages, pool_size_4k); + if (region->gpu_alloc == region->cpu_alloc) + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; + else + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; + + pages_alloced[pool_level] = pages_to_alloc_4k; + if (pages_to_alloc_4k) { + gpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->gpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[0]); + + if (!gpu_pages[pool_level]) { + alloc_failed = true; + } else if (region->gpu_alloc != region->cpu_alloc) { + cpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->cpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[1]); + + if (!cpu_pages[pool_level]) + alloc_failed = true; + } + } + + kbase_mem_pool_unlock(pool); + + if (alloc_failed) { + WARN_ON(!new_pages); + WARN_ON(pages_to_alloc_4k >= new_pages); + WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages); + break; + } + + new_pages -= pages_to_alloc_4k; + + if (!new_pages) + break; + + pool = pool->next_pool; + if (!pool) + break; + } + + if (new_pages) { + /* Allocation was unsuccessful */ + int max_pool_level = pool_level; + + pool = root_pool; + + /* Free memory allocated so far */ + for (pool_level = 0; pool_level <= max_pool_level; + pool_level++) { + kbase_mem_pool_lock(pool); + + if (region->gpu_alloc != region->cpu_alloc) { + if (pages_alloced[pool_level] && + cpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->cpu_alloc, + pool, cpu_pages[pool_level], + pages_alloced[pool_level]); + } + + if (pages_alloced[pool_level] && gpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->gpu_alloc, + pool, gpu_pages[pool_level], + pages_alloced[pool_level]); + + kbase_mem_pool_unlock(pool); + + pool = pool->next_pool; + } + + /* + * If the allocation failed despite there being enough memory in + * the pool, then just fail. Otherwise, try to grow the memory + * pool. + */ + if (alloc_failed) + *pages_to_grow = 0; + else + *pages_to_grow = new_pages; + + return false; + } + + /* Allocation was successful. No pages to grow, return success. */ + *pages_to_grow = 0; + + return true; +} + void page_fault_worker(struct work_struct *data) { u64 fault_pfn; @@ -308,17 +525,17 @@ void page_fault_worker(struct work_struct *data) struct kbase_context *kctx; struct kbase_device *kbdev; struct kbase_va_region *region; + struct kbase_fault *fault; int err; bool grown = false; - size_t min_pool_size; - struct kbase_mem_pool *pool; int pages_to_grow; - struct tagged_addr *gpu_pages, *cpu_pages; + bool grow_2mb_pool; struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; faulting_as = container_of(data, struct kbase_as, work_pagefault); - fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; as_no = faulting_as->number; kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); @@ -334,16 +551,16 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); - if (unlikely(faulting_as->protected_mode)) { + if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Protected mode fault"); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + "Protected mode fault", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); goto fault_done; } - fault_status = faulting_as->fault_status; + fault_status = fault->status; switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: @@ -351,7 +568,7 @@ void page_fault_worker(struct work_struct *data) break; case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT /* If GWT was ever enabled then we need to handle * write fault pages even if the feature was disabled later. */ @@ -363,12 +580,12 @@ void page_fault_worker(struct work_struct *data) #endif kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure"); + "Permission failure", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Translation table bus fault"); + "Translation table bus fault", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: @@ -379,61 +596,58 @@ void page_fault_worker(struct work_struct *data) case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault"); + "Address size fault", fault); else kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code"); + "Unknown fault code", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault"); + "Memory attributes fault", fault); else kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code"); + "Unknown fault code", fault); goto fault_done; default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code"); + "Unknown fault code", fault); goto fault_done; } -page_fault_retry: #ifdef CONFIG_MALI_2MB_ALLOC /* Preallocate memory for the sub-allocation structs if necessary */ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); if (!prealloc_sas[i]) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), - GFP_KERNEL); - if (!prealloc_sas[i]) { - kbase_mmu_report_fault_and_kill( - kctx, faulting_as, - "Failed pre-allocating memory for sub-allocations' metadata"); - goto fault_done; - } + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Failed pre-allocating memory for sub-allocations' metadata", + fault); + goto fault_done; } } #endif /* CONFIG_MALI_2MB_ALLOC */ +page_fault_retry: /* so we have a translation fault, let's see if it is for growable * memory */ kbase_gpu_vm_lock(kctx); region = kbase_region_tracker_find_region_enclosing_address(kctx, - faulting_as->fault_addr); + fault->addr); if (!region || region->flags & KBASE_REG_FREE) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU"); + "Memory is not mapped on the GPU", fault); goto fault_done; } if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "DMA-BUF is not mapped on the GPU"); + "DMA-BUF is not mapped on the GPU", fault); goto fault_done; } @@ -441,14 +655,14 @@ page_fault_retry: != GROWABLE_FLAGS_REQUIRED) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not growable"); + "Memory is not growable", fault); goto fault_done; } if ((region->flags & KBASE_REG_DONT_NEED)) { kbase_gpu_vm_unlock(kctx); kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Don't need memory can't be grown"); + "Don't need memory can't be grown", fault); goto fault_done; } @@ -459,13 +673,13 @@ page_fault_retry: if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", - faulting_as->fault_addr, region->start_pfn, + fault->addr, region->start_pfn, region->start_pfn + kbase_reg_current_backed_size(region)); mutex_lock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* [1] in case another page fault occurred while we were * handling the (duplicate) page fault we need to ensure we @@ -475,19 +689,19 @@ page_fault_retry: * transaction (which should cause the other page fault to be * raised again). */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, AS_COMMAND_UNLOCK, 1); mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); goto fault_done; } - new_pages = reg_grow_calc_extra_pages(region, fault_rel_pfn); + new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); /* cap to max vsize */ new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region)); @@ -496,74 +710,26 @@ page_fault_retry: mutex_lock(&kbdev->mmu_hw_mutex); /* Duplicate of a fault we've already handled, nothing to do */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); /* See comment [1] about UNLOCK usage */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, AS_COMMAND_UNLOCK, 1); mutex_unlock(&kbdev->mmu_hw_mutex); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); kbase_gpu_vm_unlock(kctx); goto fault_done; } -#ifdef CONFIG_MALI_2MB_ALLOC - if (new_pages >= (SZ_2M / SZ_4K)) { - pool = &kctx->lp_mem_pool; - /* Round up to number of 2 MB pages required */ - min_pool_size = new_pages + ((SZ_2M / SZ_4K) - 1); - min_pool_size /= (SZ_2M / SZ_4K); - } else { -#endif - pool = &kctx->mem_pool; - min_pool_size = new_pages; -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - - if (region->gpu_alloc != region->cpu_alloc) - min_pool_size *= 2; - pages_to_grow = 0; - mutex_lock(&kctx->mem_partials_lock); - kbase_mem_pool_lock(pool); - /* We can not allocate memory from the kernel with the vm_lock held, so - * check that there is enough memory in the pool. If not then calculate - * how much it has to grow by, grow the pool when the vm_lock is - * dropped, and retry the allocation. - */ - if (kbase_mem_pool_size(pool) >= min_pool_size) { - gpu_pages = kbase_alloc_phy_pages_helper_locked( - region->gpu_alloc, pool, new_pages, - &prealloc_sas[0]); - - if (gpu_pages) { - if (region->gpu_alloc != region->cpu_alloc) { - cpu_pages = kbase_alloc_phy_pages_helper_locked( - region->cpu_alloc, pool, - new_pages, &prealloc_sas[1]); - - if (cpu_pages) { - grown = true; - } else { - kbase_free_phy_pages_helper_locked( - region->gpu_alloc, - pool, gpu_pages, - new_pages); - } - } else { - grown = true; - } - } - } else { - pages_to_grow = min_pool_size - kbase_mem_pool_size(pool); - } - kbase_mem_pool_unlock(pool); - mutex_unlock(&kctx->mem_partials_lock); + spin_lock(&kctx->mem_partials_lock); + grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, + &grow_2mb_pool, prealloc_sas); + spin_unlock(&kctx->mem_partials_lock); if (grown) { u64 pfn_offset; @@ -582,7 +748,7 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush(kctx, + err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags); @@ -594,7 +760,7 @@ page_fault_retry: kbase_gpu_vm_unlock(kctx); /* The locked VA region will be unlocked and the cache invalidated in here */ kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page table update failure"); + "Page table update failure", fault); goto fault_done; } #if defined(CONFIG_MALI_GATOR_SUPPORT) @@ -619,22 +785,21 @@ page_fault_retry: * this stage a new IRQ might not be raised when the GPU finds * a MMU IRQ is already pending. */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, - faulting_as->fault_addr >> PAGE_SHIFT, - new_pages, - op, 1); + kbase_mmu_hw_do_operation(kbdev, faulting_as, + fault->addr >> PAGE_SHIFT, + new_pages, op, 1); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ /* reenable this in the mask */ - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); -#ifdef CONFIG_MALI_JOB_DUMP +#ifdef CONFIG_MALI_CINSTR_GWT if (kctx->gwt_enabled) { /* GWT also tracks growable regions. */ struct kbasep_gwt_list_element *pos; @@ -662,13 +827,27 @@ page_fault_retry: /* If the memory pool was insufficient then grow it and retry. * Otherwise fail the allocation. */ - if (pages_to_grow > 0) - ret = kbase_mem_pool_grow(pool, pages_to_grow); - + if (pages_to_grow > 0) { +#ifdef CONFIG_MALI_2MB_ALLOC + if (grow_2mb_pool) { + /* Round page requirement up to nearest 2 MB */ + pages_to_grow = (pages_to_grow + + ((1 << kctx->lp_mem_pool.order) - 1)) + >> kctx->lp_mem_pool.order; + ret = kbase_mem_pool_grow(&kctx->lp_mem_pool, + pages_to_grow); + } else { +#endif + ret = kbase_mem_pool_grow(&kctx->mem_pool, + pages_to_grow); +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + } if (ret < 0) { /* failed to extend, handle as a normal PF */ kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page allocation failure"); + "Page allocation failure", fault); } else { goto page_fault_retry; } @@ -687,54 +866,57 @@ fault_done: atomic_dec(&kbdev->faults_pending); } -phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) +static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut) { u64 *page; int i; struct page *p; - int new_page_count __maybe_unused; - - KBASE_DEBUG_ASSERT(NULL != kctx); - new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages); - kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); - p = kbase_mem_pool_alloc(&kctx->mem_pool); + p = kbase_mem_pool_alloc(&kbdev->mem_pool); if (!p) - goto sub_pages; - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); + return 0; page = kmap(p); if (NULL == page) goto alloc_free; - kbase_process_page_usage_inc(kctx, 1); + /* If the MMU tables belong to a context then account the memory usage + * to that context, otherwise the MMU tables are device wide and are + * only accounted to the device. + */ + if (mmut->kctx) { + int new_page_count; + + new_page_count = kbase_atomic_add_pages(1, + &mmut->kctx->used_pages); + KBASE_TLSTREAM_AUX_PAGESALLOC( + mmut->kctx->id, + (u64)new_page_count); + kbase_process_page_usage_inc(mmut->kctx, 1); + } + + kbase_atomic_add_pages(1, &kbdev->memdev.used_pages); for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kctx->kbdev->mmu_mode->entry_invalidate(&page[i]); + kbdev->mmu_mode->entry_invalidate(&page[i]); - kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); return page_to_phys(p); alloc_free: - kbase_mem_pool_free(&kctx->mem_pool, p, false); -sub_pages: - kbase_atomic_sub_pages(1, &kctx->used_pages); - kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + kbase_mem_pool_free(&kbdev->mem_pool, p, false); return 0; } -KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); - /* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the * new table from the pool if needed and possible */ -static int mmu_get_next_pgd(struct kbase_context *kctx, +static int mmu_get_next_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t *pgd, u64 vpfn, int level) { u64 *page; @@ -742,9 +924,8 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, struct page *p; KBASE_DEBUG_ASSERT(*pgd); - KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&mmut->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -756,23 +937,24 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (NULL == page) { - dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); return -EINVAL; } - target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); if (!target_pgd) { - target_pgd = kbase_mmu_alloc_pgd(kctx); + target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); if (!target_pgd) { - dev_dbg(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", + __func__); kunmap(p); return -ENOMEM; } - kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); - kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); /* Rely on the caller to update the address space flags. */ } @@ -785,7 +967,8 @@ static int mmu_get_next_pgd(struct kbase_context *kctx, /* * Returns the PGD for the specified level of translation */ -static int mmu_get_pgd_at_level(struct kbase_context *kctx, +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, unsigned int level, phys_addr_t *out_pgd) @@ -793,14 +976,14 @@ static int mmu_get_pgd_at_level(struct kbase_context *kctx, phys_addr_t pgd; int l; - lockdep_assert_held(&kctx->mmu_lock); - pgd = kctx->pgd; + lockdep_assert_held(&mmut->mmu_lock); + pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = mmu_get_next_pgd(kctx, &pgd, vpfn, l); + int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); /* Handle failure condition */ if (err) { - dev_dbg(kctx->kbdev->dev, + dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d\n", __func__, l); return err; @@ -812,27 +995,30 @@ static int mmu_get_pgd_at_level(struct kbase_context *kctx, return 0; } -#define mmu_get_bottom_pgd(kctx, vpfn, out_pgd) \ - mmu_get_pgd_at_level((kctx), (vpfn), MIDGARD_MMU_BOTTOMLEVEL, (out_pgd)) - +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 vpfn, + phys_addr_t *out_pgd) +{ + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, + out_pgd); +} -static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, - u64 from_vpfn, u64 to_vpfn) +static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 from_vpfn, u64 to_vpfn) { phys_addr_t pgd; u64 vpfn = from_vpfn; struct kbase_mmu_mode const *mmu_mode; - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); - lockdep_assert_held(&kctx->mmu_lock); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&mmut->mmu_lock); - mmu_mode = kctx->kbdev->mmu_mode; + mmu_mode = kbdev->mmu_mode; while (vpfn < to_vpfn) { unsigned int i; @@ -847,7 +1033,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, count = left; /* need to check if this is a 2MB page or a 4kB */ - pgd = kctx->pgd; + pgd = mmut->pgd; for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { @@ -869,7 +1055,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, pcount = count; break; default: - dev_warn(kctx->kbdev->dev, "%sNo support for ATEs at level %d\n", + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", __func__, level); goto next; } @@ -878,7 +1064,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, for (i = 0; i < pcount; i++) mmu_mode->entry_invalidate(&page[idx + i]); - kbase_mmu_sync_pgd(kctx->kbdev, + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, 8 * pcount); kunmap(phys_to_page(pgd)); @@ -907,7 +1093,6 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); @@ -917,7 +1102,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (nr == 0) return 0; - mutex_lock(&kctx->mmu_lock); + mutex_lock(&kctx->mmu.mmu_lock); while (remain) { unsigned int i; @@ -936,27 +1121,27 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu, + vpfn, &pgd); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for * the page walk to succeed */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu_lock); + mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); if (recover_required) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kctx, - recover_vpfn, - recover_vpfn + - recover_count - ); + mmu_insert_pages_failure_recovery(kctx->kbdev, + &kctx->mmu, + recover_vpfn, + recover_vpfn + recover_count); } goto fail_unlock; } @@ -968,11 +1153,10 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (recover_required) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kctx, - recover_vpfn, - recover_vpfn + - recover_count - ); + mmu_insert_pages_failure_recovery(kctx->kbdev, + &kctx->mmu, + recover_vpfn, + recover_vpfn + recover_count); } err = -ENOMEM; goto fail_unlock; @@ -1003,30 +1187,38 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return 0; fail_unlock: - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return err; } -static inline void cleanup_empty_pte(struct kbase_context *kctx, u64 *pte) +static inline void cleanup_empty_pte(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 *pte) { phys_addr_t tmp_pgd; struct page *tmp_p; - tmp_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(*pte); + tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); tmp_p = phys_to_page(tmp_pgd); - kbase_mem_pool_free(&kctx->mem_pool, tmp_p, false); - kbase_process_page_usage_dec(kctx, 1); - kbase_atomic_sub_pages(1, &kctx->used_pages); - kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false); + + /* If the MMU tables belong to a context then we accounted the memory + * usage to that context, so decrement here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); + } + kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); } -int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, const u64 start_vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags) @@ -1038,18 +1230,17 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, int err; struct kbase_mmu_mode const *mmu_mode; - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(start_vpfn); + /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); - mmu_mode = kctx->kbdev->mmu_mode; + mmu_mode = kbdev->mmu_mode; /* Early out if there is nothing to do */ if (nr == 0) return 0; - mutex_lock(&kctx->mmu_lock); + mutex_lock(&mmut->mmu_lock); while (remain) { unsigned int i; @@ -1074,28 +1265,27 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, * 256 pages at once (on average). Do we really care? */ do { - err = mmu_get_pgd_at_level(kctx, insert_vpfn, cur_level, - &pgd); + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, + cur_level, &pgd); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for * the page walk to succeed */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow(&kbdev->mem_pool, cur_level); - mutex_lock(&kctx->mmu_lock); + mutex_lock(&mmut->mmu_lock); } while (!err); if (err) { - dev_warn(kctx->kbdev->dev, + dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kctx, - start_vpfn, - insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); } goto fail_unlock; } @@ -1103,14 +1293,13 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kctx->kbdev->dev, "%s: kmap failure\n", + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); if (insert_vpfn != start_vpfn) { /* Invalidate the pages we have partially * completed */ - mmu_insert_pages_failure_recovery(kctx, - start_vpfn, - insert_vpfn); + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); } err = -ENOMEM; goto fail_unlock; @@ -1121,7 +1310,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 *target = &pgd_page[level_index]; if (mmu_mode->pte_is_valid(*target, cur_level)) - cleanup_empty_pte(kctx, target); + cleanup_empty_pte(kbdev, mmut, target); mmu_mode->entry_set_ate(target, *phys, flags, cur_level); } else { @@ -1129,18 +1318,16 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, unsigned int ofs = vindex + i; u64 *target = &pgd_page[ofs]; - /* Fail if the current page is a valid ATE entry - * unless gwt_was_enabled as in that case all - * pages will be valid from when - * kbase_gpu_gwt_start() cleared the gpu - * write flag. + /* Warn if the current page is a valid ATE + * entry. The page table shouldn't have anything + * in the place where we are trying to put a + * new entry. Modification to page table entries + * should be performed with + * kbase_mmu_update_pages() */ -#ifdef CONFIG_MALI_JOB_DUMP - if (!kctx->gwt_was_enabled) -#endif - KBASE_DEBUG_ASSERT - (0 == (*target & 1UL)); - kctx->kbdev->mmu_mode->entry_set_ate(target, + WARN_ON((*target & 1UL) != 0); + + kbdev->mmu_mode->entry_set_ate(target, phys[i], flags, cur_level); } } @@ -1149,32 +1336,39 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, insert_vpfn += count; remain -= count; - kbase_mmu_sync_pgd(kctx->kbdev, + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64)); kunmap(p); } - mutex_unlock(&kctx->mmu_lock); - return 0; + err = 0; fail_unlock: - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&mmut->mmu_lock); return err; } /* - * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space + * number 'as_nr'. */ -int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags) +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr) { int err; - err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, + phys, nr, flags); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr); + return err; } @@ -1209,8 +1403,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, op, 0); -#if KBASE_GPU_RESET_EN + vpfn, nr, op, 0); if (err) { /* Flush failed to complete, assume the * GPU has hung and perform a reset to @@ -1220,7 +1413,6 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, if (kbase_prepare_to_reset_gpu_locked(kbdev)) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ #ifndef CONFIG_MALI_NO_MALI /* @@ -1234,14 +1426,81 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, #endif /* !CONFIG_MALI_NO_MALI */ } +/* Perform a flush/invalidate on a particular address space + */ +static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, + struct kbase_as *as, + u64 vpfn, size_t nr, bool sync, bool drain_pending) +{ + int err; + u32 op; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* GPU is off so there's no need to perform flush/invalidate */ + return; + } + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + as, vpfn, nr, op, 0); + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + +#ifndef CONFIG_MALI_NO_MALI + /* + * The transaction lock must be dropped before here + * as kbase_wait_write_flush could take it if + * the GPU was powered down (static analysis doesn't + * know this can't happen). + */ + drain_pending |= (!err) && sync && + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367); + if (drain_pending) { + /* Wait for GPU to flush write buffer */ + kbase_wait_write_flush(kbdev); + } +#endif /* !CONFIG_MALI_NO_MALI */ + + kbase_pm_context_idle(kbdev); +} + +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr) +{ + /* Skip if there is nothing to do */ + if (nr) { + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, + nr, sync, false); + } +} + static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr, bool sync) { struct kbase_device *kbdev; bool ctx_is_in_runpool; -#ifndef CONFIG_MALI_NO_MALI bool drain_pending = false; +#ifndef CONFIG_MALI_NO_MALI if (atomic_xchg(&kctx->drain_pending, 0)) drain_pending = true; #endif /* !CONFIG_MALI_NO_MALI */ @@ -1258,71 +1517,22 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - int err; - u32 op; - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], + vpfn, nr, sync, drain_pending); - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, op, 0); - -#if KBASE_GPU_RESET_EN - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } -#endif /* KBASE_GPU_RESET_EN */ - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - -#ifndef CONFIG_MALI_NO_MALI - /* - * The transaction lock must be dropped before here - * as kbase_wait_write_flush could take it if - * the GPU was powered down (static analysis doesn't - * know this can't happen). - */ - drain_pending |= (!err) && sync && - kbase_hw_has_issue(kctx->kbdev, - BASE_HW_ISSUE_6367); - if (drain_pending) { - /* Wait for GPU to flush write buffer */ - kbase_wait_write_flush(kctx); - } -#endif /* !CONFIG_MALI_NO_MALI */ - - kbase_pm_context_idle(kbdev); - } kbasep_js_runpool_release_ctx(kbdev, kctx); } } -void kbase_mmu_update(struct kbase_context *kctx) +void kbase_mmu_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr) { - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the hwaccess_lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); - kctx->kbdev->mmu_mode->update(kctx); + kbdev->mmu_mode->update(kbdev, mmut, as_nr); } KBASE_EXPORT_TEST_API(kbase_mmu_update); @@ -1369,24 +1579,22 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more * information. */ -int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) { phys_addr_t pgd; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; int err = -EFAULT; - KBASE_DEBUG_ASSERT(NULL != kctx); - beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); - if (0 == nr) { /* early out if nothing to do */ return 0; } - mutex_lock(&kctx->mmu_lock); + mutex_lock(&mmut->mmu_lock); - mmu_mode = kctx->kbdev->mmu_mode; + mmu_mode = kbdev->mmu_mode; while (nr) { unsigned int i; @@ -1400,7 +1608,7 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) count = nr; /* need to check if this is a 2MB or a 4kB page */ - pgd = kctx->pgd; + pgd = mmut->pgd; for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { @@ -1438,7 +1646,7 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) switch (level) { case MIDGARD_MMU_LEVEL(0): case MIDGARD_MMU_LEVEL(1): - dev_warn(kctx->kbdev->dev, + dev_warn(kbdev->dev, "%s: No support for ATEs at level %d\n", __func__, level); kunmap(phys_to_page(pgd)); @@ -1448,7 +1656,7 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) if (count >= 512) { pcount = 1; } else { - dev_warn(kctx->kbdev->dev, + dev_warn(kbdev->dev, "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", __func__, count); pcount = 0; @@ -1459,7 +1667,7 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) pcount = count; break; default: - dev_err(kctx->kbdev->dev, + dev_err(kbdev->dev, "%s: found non-mapped memory, early out\n", __func__); vpfn += count; @@ -1471,7 +1679,7 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) for (i = 0; i < pcount; i++) mmu_mode->entry_invalidate(&page[index + i]); - kbase_mmu_sync_pgd(kctx->kbdev, + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * index, 8*pcount); @@ -1482,26 +1690,35 @@ next: } err = 0; out: - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); /** - * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'. - * This call is being triggered as a response to the changes of the mem attributes + * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU * - * @pre : The caller is responsible for validating the memory attributes + * This will update page table entries that already exist on the GPU based on + * the new flags that are passed. It is used as a response to the changes of + * the memory attributes * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. + * The caller is responsible for validating the memory attributes + * + * @kctx: Kbase context + * @vpfn: Virtual PFN (Page Frame Number) of the first page to update + * @phys: Tagged physical addresses of the physical pages to replace the + * current mappings + * @nr: Number of pages to update + * @flags: Flags */ -int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags) { @@ -1511,14 +1728,13 @@ int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, int err; KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); /* Early out if there is nothing to do */ if (nr == 0) return 0; - mutex_lock(&kctx->mmu_lock); + mutex_lock(&kctx->mmu.mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1532,16 +1748,17 @@ int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, count = nr; do { - err = mmu_get_bottom_pgd(kctx, vpfn, &pgd); + err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu, + vpfn, &pgd); if (err != -ENOMEM) break; /* Fill the memory pool with enough pages for * the page walk to succeed */ - mutex_unlock(&kctx->mmu_lock); - err = kbase_mem_pool_grow(&kctx->mem_pool, + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu_lock); + mutex_lock(&kctx->mmu.mmu_lock); } while (!err); if (err) { dev_warn(kctx->kbdev->dev, @@ -1572,11 +1789,11 @@ int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, kunmap(pfn_to_page(PFN_DOWN(pgd))); } - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); return 0; fail_unlock: - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); return err; } @@ -1591,8 +1808,9 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, return err; } -static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, - int level, u64 *pgd_page_buffer) +static void mmu_teardown_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t pgd, + int level, u64 *pgd_page_buffer) { phys_addr_t target_pgd; struct page *p; @@ -1600,9 +1818,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int i; struct kbase_mmu_mode const *mmu_mode; - KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&mmut->mmu_lock); pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ @@ -1613,14 +1829,14 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; - mmu_mode = kctx->kbdev->mmu_mode; + mmu_mode = kbdev->mmu_mode; for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); if (target_pgd) { if (mmu_mode->pte_is_valid(pgd_page[i], level)) { - mmu_teardown_level(kctx, + mmu_teardown_level(kbdev, mmut, target_pgd, level + 1, pgd_page_buffer + @@ -1630,56 +1846,69 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, } p = pfn_to_page(PFN_DOWN(pgd)); - kbase_mem_pool_free(&kctx->mem_pool, p, true); - kbase_process_page_usage_dec(kctx, 1); - kbase_atomic_sub_pages(1, &kctx->used_pages); - kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + kbase_mem_pool_free(&kbdev->mem_pool, p, true); + kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); + } } -int kbase_mmu_init(struct kbase_context *kctx) +int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct kbase_context *kctx) { - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); - - mutex_init(&kctx->mmu_lock); + mutex_init(&mmut->mmu_lock); + mmut->kctx = kctx; /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ - kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); - if (NULL == kctx->mmu_teardown_pages) + if (mmut->mmu_teardown_pages == NULL) return -ENOMEM; - return 0; -} + mmut->pgd = 0; + /* We allocate pages into the kbdev memory pool, then + * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to + * avoid allocations from the kernel happening with the lock held. + */ + while (!mmut->pgd) { + int err; -void kbase_mmu_term(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + err = kbase_mem_pool_grow(&kbdev->mem_pool, + MIDGARD_MMU_BOTTOMLEVEL); + if (err) { + kbase_mmu_term(kbdev, mmut); + return -ENOMEM; + } + + mutex_lock(&mmut->mmu_lock); + mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); + } - kfree(kctx->mmu_teardown_pages); - kctx->mmu_teardown_pages = NULL; + return 0; } -void kbase_mmu_free_pgd(struct kbase_context *kctx) +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { - int new_page_count = 0; - - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); - - mutex_lock(&kctx->mmu_lock); - mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, - kctx->mmu_teardown_pages); - mutex_unlock(&kctx->mmu_lock); + if (mmut->pgd) { + mutex_lock(&mmut->mmu_lock); + mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, + mmut->mmu_teardown_pages); + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0); + } - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); + kfree(mmut->mmu_teardown_pages); + mutex_destroy(&mmut->mmu_lock); } -KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); - static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) { phys_addr_t target_pgd; @@ -1690,7 +1919,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->mmu.mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1755,7 +1984,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(0 != size_left); kaddr = vmalloc_user(size_left); - mutex_lock(&kctx->mmu_lock); + mutex_lock(&kctx->mmu.mmu_lock); if (kaddr) { u64 end_marker = 0xFFULL; @@ -1770,7 +1999,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (kctx->api_version >= KBASE_API_VERSION(8, 4)) { struct kbase_mmu_setup as_setup; - kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); + kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, + &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; config[2] = as_setup.transcfg; @@ -1781,7 +2011,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) } dump_size = kbasep_mmu_dump_level(kctx, - kctx->pgd, + kctx->mmu.pgd, MIDGARD_MMU_TOPLEVEL, &mmu_dump_buffer, &size_left); @@ -1803,12 +2033,12 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); return kaddr; fail_free: vfree(kaddr); - mutex_unlock(&kctx->mmu_lock); + mutex_unlock(&kctx->mmu.mmu_lock); return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); @@ -1819,11 +2049,14 @@ void bus_fault_worker(struct work_struct *data) int as_no; struct kbase_context *kctx; struct kbase_device *kbdev; -#if KBASE_GPU_RESET_EN + struct kbase_fault *fault; bool reset_status = false; -#endif /* KBASE_GPU_RESET_EN */ faulting_as = container_of(data, struct kbase_as, work_busfault); + fault = &faulting_as->bf_data; + + /* Ensure that any pending page fault worker has completed */ + flush_work(&faulting_as->work_pagefault); as_no = faulting_as->number; @@ -1838,10 +2071,10 @@ void bus_fault_worker(struct work_struct *data) return; } - if (unlikely(faulting_as->protected_mode)) { + if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure"); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + "Permission failure", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); kbasep_js_runpool_release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); @@ -1849,7 +2082,6 @@ void bus_fault_worker(struct work_struct *data) } -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. * We start the reset before switching to UNMAPPED to ensure that unrelated jobs @@ -1858,7 +2090,6 @@ void bus_fault_worker(struct work_struct *data) dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); reset_status = kbase_prepare_to_reset_gpu(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { unsigned long flags; @@ -1875,18 +2106,16 @@ void bus_fault_worker(struct work_struct *data) mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); kbase_pm_context_idle(kbdev); } -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ kbasep_js_runpool_release_ctx(kbdev, kctx); @@ -2086,7 +2315,8 @@ static const char *access_type_name(struct kbase_device *kbdev, * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. */ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str) + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault) { unsigned long flags; int exception_type; @@ -2096,9 +2326,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; -#if KBASE_GPU_RESET_EN bool reset_status = false; -#endif as_no = as->number; kbdev = kctx->kbdev; @@ -2108,9 +2336,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); /* decode the fault status */ - exception_type = as->fault_status & 0xFF; - access_type = (as->fault_status >> 8) & 0x3; - source_id = (as->fault_status >> 16); + exception_type = fault->status & 0xFF; + access_type = (fault->status >> 8) & 0x3; + source_id = (fault->status >> 16); /* terminal fault, print info about the fault */ dev_err(kbdev->dev, @@ -2122,12 +2350,12 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, as->fault_addr, + as_no, fault->addr, reason_str, - as->fault_status, - (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + fault->status, + (fault->status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), exception_type, kbase_exception_name(kbdev, exception_type), - access_type, access_type_name(kbdev, as->fault_status), + access_type, access_type_name(kbdev, fault->status), source_id, kctx->pid); @@ -2135,11 +2363,9 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) { - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - - if ((as->fault_addr >= kbdev->hwcnt.addr) && - (as->fault_addr < (kbdev->hwcnt.addr + - (num_core_groups * 2048)))) + if ((fault->addr >= kbdev->hwcnt.addr) && + (fault->addr < (kbdev->hwcnt.addr + + kbdev->hwcnt.addr_bytes))) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; } @@ -2154,7 +2380,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kbase_backend_jm_kill_jobs_from_kctx(kctx); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. * We start the reset before switching to UNMAPPED to ensure that unrelated jobs @@ -2163,7 +2388,6 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); reset_status = kbase_prepare_to_reset_gpu(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); @@ -2172,22 +2396,19 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, kctx, + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, kctx, + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); -#if KBASE_GPU_RESET_EN if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) kbase_reset_gpu(kbdev); -#endif /* KBASE_GPU_RESET_EN */ } void kbasep_as_do_poke(struct work_struct *work) { struct kbase_as *as; struct kbase_device *kbdev; - struct kbase_context *kctx; unsigned long flags; KBASE_DEBUG_ASSERT(work); @@ -2203,12 +2424,11 @@ void kbasep_as_do_poke(struct work_struct *work) * the AS will not be released as before the atom is released this workqueue * is flushed (in kbase_as_poking_timer_release_atom) */ - kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); /* Force a uTLB invalidate */ - kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, + kbase_mmu_hw_do_operation(kbdev, as, 0, 0, AS_COMMAND_UNLOCK, 0); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ @@ -2341,33 +2561,35 @@ void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase katom->poking = 0; } -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as) +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); if (!kctx) { - dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", - kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault", - as->number, as->fault_addr); + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as) ? + "Bus error" : "Page fault", + as->number, fault->addr); /* Since no ctx was found, the MMU must be disabled. */ WARN_ON(as->current_setup.transtab); if (kbase_as_has_bus_fault(as)) { - kbase_mmu_hw_clear_fault(kbdev, as, kctx, + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, kctx, + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); } else if (kbase_as_has_page_fault(as)) { - kbase_mmu_hw_clear_fault(kbdev, as, kctx, + kbase_mmu_hw_clear_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, kctx, + kbase_mmu_hw_enable_fault(kbdev, as, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } -#if KBASE_GPU_RESET_EN if (kbase_as_has_bus_fault(as) && kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { bool reset_status; @@ -2381,7 +2603,6 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex if (reset_status) kbase_reset_gpu_locked(kbdev); } -#endif /* KBASE_GPU_RESET_EN */ return; } @@ -2407,11 +2628,11 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) dev_warn(kbdev->dev, "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, as->fault_addr, - as->fault_extra_addr); + as->number, fault->addr, + fault->extra_addr); else dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, as->fault_addr); + as->number, fault->addr); /* * We need to switch to UNMAPPED mode - but we do this in a diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h index 92aa55dc2b35..70d5f2becc71 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -70,10 +70,9 @@ enum kbase_mmu_fault_type { * * @param[in] kbdev kbase device to configure. * @param[in] as address space to configure. - * @param[in] kctx kbase context to configure. */ void kbase_mmu_hw_configure(struct kbase_device *kbdev, - struct kbase_as *as, struct kbase_context *kctx); + struct kbase_as *as); /** @brief Issue an operation to the MMU. * @@ -82,7 +81,6 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * * @param[in] kbdev kbase device to issue the MMU operation on. * @param[in] as address space to issue the MMU operation on. - * @param[in] kctx kbase context to issue the MMU operation on. * @param[in] vpfn MMU Virtual Page Frame Number to start the * operation on. * @param[in] nr Number of pages to work on. @@ -93,7 +91,7 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, * @return Zero if the operation was successful, non-zero otherwise. */ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type, + u64 vpfn, u32 nr, u32 type, unsigned int handling_irq); /** @brief Clear a fault that has been previously reported by the MMU. @@ -102,11 +100,10 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, * * @param[in] kbdev kbase device to clear the fault from. * @param[in] as address space to clear the fault from. - * @param[in] kctx kbase context to clear the fault from or NULL. * @param[in] type The type of fault that needs to be cleared. */ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, enum kbase_mmu_fault_type type); + enum kbase_mmu_fault_type type); /** @brief Enable fault that has been previously reported by the MMU. * @@ -116,11 +113,10 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, * * @param[in] kbdev kbase device to again enable the fault from. * @param[in] as address space to again enable the fault from. - * @param[in] kctx kbase context to again enable the fault from. * @param[in] type The type of fault that needs to be enabled again. */ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - struct kbase_context *kctx, enum kbase_mmu_fault_type type); + enum kbase_mmu_fault_type type); /** @} *//* end group mali_kbase_mmu_hw */ /** @} *//* end group base_kbase_api */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c index aa0c4038b563..38ca456477cc 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2014, 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -68,7 +68,7 @@ static inline void page_table_entry_set(u64 *pte, u64 phy) #endif } -static void mmu_get_as_setup(struct kbase_context *kctx, +static void mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup) { /* Set up the required caching policies at the correct indices @@ -84,22 +84,30 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | (AS_MEMATTR_AARCH64_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)); + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_AARCH64_NON_CACHEABLE << + (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); - setup->transtab = (u64)kctx->pgd & AS_TRANSTAB_BASE_MASK; + setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; } -static void mmu_update(struct kbase_context *kctx) +static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr) { - struct kbase_device * const kbdev = kctx->kbdev; - struct kbase_as * const as = &kbdev->as[kctx->as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; + struct kbase_as *as; + struct kbase_mmu_setup *current_setup; + + if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) + return; + + as = &kbdev->as[as_nr]; + current_setup = &as->current_setup; - mmu_get_as_setup(kctx, current_setup); + mmu_get_as_setup(mmut, current_setup); /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as, kctx); + kbase_mmu_hw_configure(kbdev, as); } static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) @@ -111,7 +119,7 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as, NULL); + kbase_mmu_hw_configure(kbdev, as); } static phys_addr_t pte_to_phy_addr(u64 entry) @@ -205,7 +213,8 @@ static struct kbase_mmu_mode const aarch64_mode = { .pte_is_valid = pte_is_valid, .entry_set_ate = entry_set_ate, .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate + .entry_invalidate = entry_invalidate, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE }; struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c index 7dc38fcb792b..f6bdf91dc225 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,7 +66,7 @@ static inline void page_table_entry_set(u64 *pte, u64 phy) #endif } -static void mmu_get_as_setup(struct kbase_context *kctx, +static void mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup) { /* Set up the required caching policies at the correct indices @@ -84,7 +84,7 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_INDEX_OUTER_WA * 8)) | 0; /* The other indices are unused for now */ - setup->transtab = ((u64)kctx->pgd & + setup->transtab = ((u64)mmut->pgd & ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | AS_TRANSTAB_LPAE_ADRMODE_TABLE | AS_TRANSTAB_LPAE_READ_INNER; @@ -92,16 +92,23 @@ static void mmu_get_as_setup(struct kbase_context *kctx, setup->transcfg = 0; } -static void mmu_update(struct kbase_context *kctx) +static void mmu_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr) { - struct kbase_device * const kbdev = kctx->kbdev; - struct kbase_as * const as = &kbdev->as[kctx->as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; + struct kbase_as *as; + struct kbase_mmu_setup *current_setup; + + if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) + return; - mmu_get_as_setup(kctx, current_setup); + as = &kbdev->as[as_nr]; + current_setup = &as->current_setup; + + mmu_get_as_setup(mmut, current_setup); /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as, kctx); + kbase_mmu_hw_configure(kbdev, as); } static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) @@ -112,7 +119,7 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as, NULL); + kbase_mmu_hw_configure(kbdev, as); } static phys_addr_t pte_to_phy_addr(u64 entry) @@ -139,9 +146,17 @@ static int pte_is_valid(u64 pte, unsigned int level) static u64 get_mmu_flags(unsigned long flags) { u64 mmu_flags; - - /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ - mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + unsigned long memattr_idx; + + memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); + if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, + "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) + memattr_idx = AS_MEMATTR_INDEX_DEFAULT; + /* store mem_attr index as 4:2, noting that: + * - macro called above ensures 3 bits already + * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits + */ + mmu_flags = memattr_idx << 2; /* write perm if requested */ mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; @@ -189,7 +204,8 @@ static struct kbase_mmu_mode const lpae_mode = { .pte_is_valid = pte_is_valid, .entry_set_ate = entry_set_ate, .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate + .entry_invalidate = entry_invalidate, + .flags = 0 }; struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c index e3cb0b1f38b0..5699eb8feaf2 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_pm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -52,18 +53,9 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int c; - int old_count; KBASE_DEBUG_ASSERT(kbdev != NULL); - /* Trace timeline information about how long it took to handle the decision - * to powerup. Sometimes the event might be missed due to reading the count - * outside of mutex, but this is necessary to get the trace timing - * correct. */ - old_count = kbdev->pm.active_count; - if (old_count == 0) - kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); - mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); if (kbase_pm_is_suspending(kbdev)) { @@ -75,8 +67,6 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); - if (old_count == 0) - kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); return 1; case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: @@ -87,22 +77,13 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas } } c = ++kbdev->pm.active_count; - KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); - /* Trace the event being handled */ - if (old_count == 0) - kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); - if (c == 1) { /* First context active: Power on the GPU and any cores requested by * the policy */ kbase_hwaccess_pm_gpu_active(kbdev); } -#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ) - if (kbdev->ipa.gpu_active_callback) - kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data); -#endif mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); @@ -116,56 +97,29 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; int c; - int old_count; KBASE_DEBUG_ASSERT(kbdev != NULL); - /* Trace timeline information about how long it took to handle the decision - * to powerdown. Sometimes the event might be missed due to reading the - * count outside of mutex, but this is necessary to get the trace timing - * correct. */ - old_count = kbdev->pm.active_count; - if (old_count == 0) - kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); c = --kbdev->pm.active_count; - KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); KBASE_DEBUG_ASSERT(c >= 0); - /* Trace the event being handled */ - if (old_count == 0) - kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); - if (c == 0) { /* Last context has gone idle */ kbase_hwaccess_pm_gpu_idle(kbdev); /* Wake up anyone waiting for this to become 0 (e.g. suspend). The * waiters must synchronize with us by locking the pm.lock after - * waiting */ + * waiting. + */ wake_up(&kbdev->pm.zero_active_count_wait); } -#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ) - /* IPA may be using vinstr, in which case there may be one PM reference - * still held when all other contexts have left the GPU. Inform IPA that - * the GPU is now idle so that vinstr can drop it's reference. - * - * If the GPU was only briefly active then it might have gone idle - * before vinstr has taken a PM reference, meaning that active_count is - * zero. We still need to inform IPA in this case, so that vinstr can - * drop the PM reference and avoid keeping the GPU powered - * unnecessarily. - */ - if (c <= 1 && kbdev->ipa.gpu_idle_callback) - kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data); -#endif - mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); } @@ -176,10 +130,16 @@ void kbase_pm_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ + /* Suspend vinstr. This blocks until the vinstr worker and timer are + * no longer running. + */ kbase_vinstr_suspend(kbdev->vinstr_ctx); + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + mutex_lock(&kbdev->pm.lock); KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); kbdev->pm.suspending = true; @@ -206,6 +166,8 @@ void kbase_pm_suspend(struct kbase_device *kbdev) void kbase_pm_resume(struct kbase_device *kbdev) { + unsigned long flags; + /* MUST happen before any pm_context_active calls occur */ kbase_hwaccess_pm_resume(kbdev); @@ -224,7 +186,11 @@ void kbase_pm_resume(struct kbase_device *kbdev) * need it and the policy doesn't want it on */ kbase_pm_context_idle(kbdev); - /* Resume vinstr operation */ + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); } - diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h index 8de17e1aca59..59a031467c95 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_pm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -136,6 +136,10 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbas */ void kbase_pm_context_idle(struct kbase_device *kbdev); +/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline + * function + */ + /** * Suspend the GPU and prevent any further register accesses to it from Kernel * threads. diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h deleted file mode 100755 index 15bca79fd64d..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_profiling_gator_api.h - * Model interface - */ - -#ifndef _KBASE_PROFILING_GATOR_API_H_ -#define _KBASE_PROFILING_GATOR_API_H_ - -/* - * List of possible actions to be controlled by Streamline. - * The following numbers are used by gator to control - * the frame buffer dumping and s/w counter reporting. - */ -#define FBDUMP_CONTROL_ENABLE (1) -#define FBDUMP_CONTROL_RATE (2) -#define SW_COUNTER_ENABLE (3) -#define FBDUMP_CONTROL_RESIZE_FACTOR (4) -#define FBDUMP_CONTROL_MAX (5) -#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE - -void _mali_profiling_control(u32 action, u32 value); - -#endif /* _KBASE_PROFILING_GATOR_API */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 01b30878a827..e762af4a2bcb 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -495,35 +495,6 @@ static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) kbase_js_sched_all(katom->kctx->kbdev); } -struct kbase_debug_copy_buffer { - size_t size; - struct page **pages; - int nr_pages; - size_t offset; - struct kbase_mem_phy_alloc *gpu_alloc; - - struct page **extres_pages; - int nr_extres_pages; -}; - -static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) -{ - struct page **pages = buffer->extres_pages; - int nr_pages = buffer->nr_extres_pages; - - if (pages) { - int i; - - for (i = 0; i < nr_pages; i++) { - struct page *pg = pages[i]; - - if (pg) - put_page(pg); - } - kfree(pages); - } -} - static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -546,12 +517,15 @@ static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) if (pg) put_page(pg); } - kfree(buffers[i].pages); + if (buffers[i].is_vmalloc) + vfree(buffers[i].pages); + else + kfree(buffers[i].pages); if (gpu_alloc) { switch (gpu_alloc->type) { case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - free_user_buffer(&buffers[i]); + kbase_free_user_buffer(&buffers[i]); break; } default: @@ -613,6 +587,11 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) if (!addr) continue; + if (last_page_addr < page_addr) { + ret = -EINVAL; + goto out_cleanup; + } + buffers[i].nr_pages = nr_pages; buffers[i].offset = addr & ~PAGE_MASK; if (buffers[i].offset >= PAGE_SIZE) { @@ -621,8 +600,17 @@ static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) } buffers[i].size = user_buffers[i].size; - buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), - GFP_KERNEL); + if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / + sizeof(struct page *))) { + buffers[i].is_vmalloc = true; + buffers[i].pages = vzalloc(nr_pages * + sizeof(struct page *)); + } else { + buffers[i].is_vmalloc = false; + buffers[i].pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + } + if (!buffers[i].pages) { ret = -ENOMEM; goto out_cleanup; @@ -713,7 +701,7 @@ out_cleanup: return ret; } -static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, +void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, void *extres_page, struct page **pages, unsigned int nr_pages, unsigned int *target_page_nr, size_t offset, size_t *to_copy) { @@ -755,7 +743,7 @@ static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, kunmap(pages[*target_page_nr]); } -static int kbase_mem_copy_from_extres(struct kbase_context *kctx, +int kbase_mem_copy_from_extres(struct kbase_context *kctx, struct kbase_debug_copy_buffer *buf_data) { unsigned int i; @@ -867,48 +855,22 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } -static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) -{ - __user void *data = (__user void *)(uintptr_t) katom->jc; - struct base_jit_alloc_info *info; - struct kbase_context *kctx = katom->kctx; - int ret; +#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) - /* Fail the job if there is no info structure */ - if (!data) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(info, data, sizeof(*info)) != 0) { - ret = -EINVAL; - goto free_info; - } - - /* If the ID is zero then fail the job */ - if (info->id == 0) { - ret = -EINVAL; - goto free_info; - } +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + /* If the ID is zero, then fail the job */ + if (info->id == 0) + return -EINVAL; /* Sanity check that the PA fits within the VA */ - if (info->va_pages < info->commit_pages) { - ret = -EINVAL; - goto free_info; - } + if (info->va_pages < info->commit_pages) + return -EINVAL; /* Ensure the GPU address is correctly aligned */ - if ((info->gpu_alloc_addr & 0x7) != 0) { - ret = -EINVAL; - goto free_info; - } + if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) + return -EINVAL; if (kctx->jit_version == 1) { /* Old JIT didn't have usage_id, max_allocations, bin_id @@ -920,24 +882,67 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) info->flags = 0; memset(info->padding, 0, sizeof(info->padding)); } else { - int i; + int j; /* Check padding is all zeroed */ - for (i = 0; i < sizeof(info->padding); i++) { - if (info->padding[i] != 0) { - ret = -EINVAL; - goto free_info; + for (j = 0; j < sizeof(info->padding); j++) { + if (info->padding[j] != 0) { + return -EINVAL; } } /* No bit other than TILER_ALIGN_TOP shall be set */ if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { - ret = -EINVAL; - goto free_info; + return -EINVAL; } } + return 0; +} + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user void *data = (__user void *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + struct kbase_context *kctx = katom->kctx; + u32 count; + int ret; + u32 i; + + /* For backwards compatibility */ + if (katom->nr_extres == 0) + katom->nr_extres = 1; + count = katom->nr_extres; + + /* Sanity checks */ + if (!data || count > kctx->jit_max_allocations || + count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + if (copy_from_user(info, data, sizeof(*info)*count) != 0) { + ret = -EINVAL; + goto free_info; + } katom->softjob_data = info; + + for (i = 0; i < count; i++, info++) { + ret = kbasep_jit_alloc_validate(kctx, info); + if (ret) + goto free_info; + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(katom, + info->va_pages, info->commit_pages, info->extent, + info->id, info->bin_id, info->max_allocations, + info->flags, info->usage_id); + } + katom->jit_blocked = false; lockdep_assert_held(&kctx->jctx.lock); @@ -957,18 +962,38 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) return 0; free_info: - kfree(info); + kfree(katom->softjob_data); + katom->softjob_data = NULL; fail: return ret; } -static u8 kbase_jit_free_get_id(struct kbase_jd_atom *katom) +static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) { if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != BASE_JD_REQ_SOFT_JIT_FREE)) - return 0; + return NULL; + + return (u8 *) katom->softjob_data; +} + +static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct list_head *target_list_head = NULL; + struct kbase_jd_atom *entry; - return (u8) katom->jc; + list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) { + if (katom->age < entry->age) { + target_list_head = &entry->queue; + break; + } + } + + if (target_list_head == NULL) + target_list_head = &kctx->jit_pending_alloc; + + list_add_tail(&katom->queue, target_list_head); } static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) @@ -978,6 +1003,8 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) struct kbase_va_region *reg; struct kbase_vmap_struct mapping; u64 *ptr, new_addr; + u32 count = katom->nr_extres; + u32 i; if (katom->jit_blocked) { list_del(&katom->queue); @@ -985,97 +1012,131 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } info = katom->softjob_data; - if (WARN_ON(!info)) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; return 0; } - /* The JIT ID is still in use so fail the allocation */ - if (kctx->jit_alloc[info->id]) { - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return 0; + for (i = 0; i < count; i++, info++) { + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } } - /* Create a JIT allocation */ - reg = kbase_jit_allocate(kctx, info); - if (!reg) { - struct kbase_jd_atom *jit_atom; - bool can_block = false; + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + if (kctx->jit_alloc[info->id]) { + /* The JIT ID is duplicated in this atom. Roll back + * previous allocations and fail. + */ + u32 j; - lockdep_assert_held(&kctx->jctx.lock); + info = katom->softjob_data; + for (j = 0; j < i; j++, info++) { + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = + (struct kbase_va_region *) -1; + } - jit_atom = list_first_entry(&kctx->jit_atoms_head, - struct kbase_jd_atom, jit_node); + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } - list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { - if (jit_atom == katom) - break; - if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_JIT_FREE) { - u8 free_id = kbase_jit_free_get_id(jit_atom); - - if (free_id && kctx->jit_alloc[free_id]) { - /* A JIT free which is active and - * submitted before this atom - */ - can_block = true; + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info); + if (!reg) { + struct kbase_jd_atom *jit_atom; + bool can_block = false; + + lockdep_assert_held(&kctx->jctx.lock); + + jit_atom = list_first_entry(&kctx->jit_atoms_head, + struct kbase_jd_atom, jit_node); + + list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { + if (jit_atom == katom) break; + + if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_JIT_FREE) { + u8 *free_ids = kbase_jit_free_get_ids(jit_atom); + + if (free_ids && *free_ids && + kctx->jit_alloc[*free_ids]) { + /* A JIT free which is active and + * submitted before this atom + */ + can_block = true; + break; + } } } - } - if (!can_block) { - /* Mark the allocation so we know it's in use even if - * the allocation itself fails. + if (!can_block) { + /* Mark the failed allocation as well as the + * other un-attempted allocations in the set, + * so we know they are in use even if the + * allocation itself failed. + */ + for (; i < count; i++, info++) { + kctx->jit_alloc[info->id] = + (struct kbase_va_region *) -1; + } + + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } + + /* There are pending frees for an active allocation + * so we should wait to see whether they free the + * memory. Add to the list of atoms for which JIT + * allocation is pending. */ - kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; + kbase_jit_add_to_pending_alloc_list(katom); + katom->jit_blocked = true; + + /* Rollback, the whole set will be re-attempted */ + while (i-- > 0) { + info--; + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = NULL; + } - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return 0; + return 1; } - /* There are pending frees for an active allocation - * so we should wait to see whether they free the memory. - * Add to the beginning of the list to ensure that the atom is - * processed only once in kbase_jit_free_finish - */ - list_add(&katom->queue, &kctx->jit_pending_alloc); - katom->jit_blocked = true; - - return 1; + /* Bind it to the user provided ID. */ + kctx->jit_alloc[info->id] = reg; } - /* - * Write the address of the JIT allocation to the user provided - * GPU allocation. - */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); - if (!ptr) { + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { /* - * Leave the allocation "live" as the JIT free jit will be - * submitted anyway. + * Write the address of the JIT allocation to the user provided + * GPU allocation. */ - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return 0; - } + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocations "live" as the JIT free atom + * will be submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return 0; + } - new_addr = reg->start_pfn << PAGE_SHIFT; - *ptr = new_addr; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( - katom, info->gpu_alloc_addr, new_addr); - kbase_vunmap(kctx, &mapping); + reg = kctx->jit_alloc[info->id]; + new_addr = reg->start_pfn << PAGE_SHIFT; + *ptr = new_addr; + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(katom, + info->gpu_alloc_addr, + new_addr, info->va_pages); + kbase_vunmap(kctx, &mapping); + } katom->event_code = BASE_JD_EVENT_DONE; - /* - * Bind it to the user provided ID. Do this last so we can check for - * the JIT free racing this JIT alloc job. - */ - kctx->jit_alloc[info->id] = reg; - return 0; } @@ -1085,6 +1146,9 @@ static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) lockdep_assert_held(&katom->kctx->jctx.lock); + if (WARN_ON(!katom->softjob_data)) + return; + /* Remove atom from jit_atoms_head list */ list_del(&katom->jit_node); @@ -1101,34 +1165,79 @@ static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; + __user void *data = (__user void *)(uintptr_t) katom->jc; + u8 *ids; + u32 count = MAX(katom->nr_extres, 1); + u32 i; + int ret; + + /* Sanity checks */ + if (count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + if (!ids) { + ret = -ENOMEM; + goto fail; + } lockdep_assert_held(&kctx->jctx.lock); + katom->softjob_data = ids; + + /* For backwards compatibility */ + if (katom->nr_extres) { + /* Fail the job if there is no list of ids */ + if (!data) { + ret = -EINVAL; + goto free_info; + } + + if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { + ret = -EINVAL; + goto free_info; + } + } else { + katom->nr_extres = 1; + *ids = (u8)katom->jc; + } + for (i = 0; i < count; i++) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(katom, ids[i]); + list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); return 0; + +free_info: + kfree(katom->softjob_data); + katom->softjob_data = NULL; +fail: + return ret; } static void kbase_jit_free_process(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - u8 id = kbase_jit_free_get_id(katom); + u8 *ids = kbase_jit_free_get_ids(katom); + u32 count = katom->nr_extres; + u32 i; - /* - * If the ID is zero or it is not in use yet then fail the job. - */ - if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { + if (ids == NULL) { katom->event_code = BASE_JD_EVENT_JOB_INVALID; return; } - /* - * If the ID is valid but the allocation request failed still succeed - * this soft job but don't try and free the allocation. - */ - if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) - kbase_jit_free(kctx, kctx->jit_alloc[id]); - - kctx->jit_alloc[id] = NULL; + for (i = 0; i < count; i++, ids++) { + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + } } static void kbasep_jit_free_finish_worker(struct work_struct *work) @@ -1151,12 +1260,39 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom) { struct list_head *i, *tmp; struct kbase_context *kctx = katom->kctx; + LIST_HEAD(jit_pending_alloc_list); + u8 *ids; + size_t j; lockdep_assert_held(&kctx->jctx.lock); + + ids = kbase_jit_free_get_ids(katom); + if (WARN_ON(ids == NULL)) { + return; + } + /* Remove this atom from the kctx->jit_atoms_head list */ list_del(&katom->jit_node); - list_for_each_safe(i, tmp, &kctx->jit_pending_alloc) { + for (j = 0; j != katom->nr_extres; ++j) { + if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { + /* + * If the ID is valid but the allocation request failed + * still succeed this soft job but don't try and free + * the allocation. + */ + if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) + kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); + + kctx->jit_alloc[ids[j]] = NULL; + } + } + /* Free the list of ids */ + kfree(ids); + + list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list); + + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { struct kbase_jd_atom *pending_atom = list_entry(i, struct kbase_jd_atom, queue); if (kbase_jit_allocate_process(pending_atom) == 0) { @@ -1295,11 +1431,14 @@ static void kbase_ext_res_finish(struct kbase_jd_atom *katom) int kbase_process_soft_job(struct kbase_jd_atom *katom) { + int ret = 0; + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: - return kbase_dump_cpu_gpu_time(katom); + ret = kbase_dump_cpu_gpu_time(katom); + break; #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) case BASE_JD_REQ_SOFT_FENCE_TRIGGER: @@ -1309,7 +1448,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) break; case BASE_JD_REQ_SOFT_FENCE_WAIT: { - int ret = kbase_sync_fence_in_wait(katom); + ret = kbase_sync_fence_in_wait(katom); if (ret == 1) { #ifdef CONFIG_MALI_FENCE_DEBUG @@ -1318,14 +1457,16 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) kbasep_add_waiting_soft_job(katom); #endif } - return ret; + break; } #endif case BASE_JD_REQ_SOFT_REPLAY: - return kbase_replay_process(katom); + ret = kbase_replay_process(katom); + break; case BASE_JD_REQ_SOFT_EVENT_WAIT: - return kbasep_soft_event_wait(katom); + ret = kbasep_soft_event_wait(katom); + break; case BASE_JD_REQ_SOFT_EVENT_SET: kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); break; @@ -1341,7 +1482,8 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) break; } case BASE_JD_REQ_SOFT_JIT_ALLOC: - return kbase_jit_allocate_process(katom); + ret = kbase_jit_allocate_process(katom); + break; case BASE_JD_REQ_SOFT_JIT_FREE: kbase_jit_free_process(katom); break; @@ -1354,7 +1496,8 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) } /* Atom is complete */ - return 0; + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom); + return ret; } void kbase_cancel_soft_job(struct kbase_jd_atom *katom) @@ -1460,7 +1603,6 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom); switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h index a7690b2e4463..70557dd5b33f 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_sync.h +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -161,7 +161,11 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); */ static inline void kbase_sync_fence_close_fd(int fd) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + ksys_close(fd); +#else sys_close(fd); +#endif } /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c index 9520f5ac3b5e..5239daee409e 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c +++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,7 @@ #include #include "mali_kbase.h" +#include "mali_kbase_sync.h" void kbase_sync_fence_wait_worker(struct work_struct *data) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 2ff45f50bf16..10e38897514b 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -143,14 +143,13 @@ enum tl_msg_id_obj { KBASE_TL_NRET_AS_CTX, KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, - KBASE_TL_DEP_ATOM_ATOM, - KBASE_TL_NDEP_ATOM_ATOM, - KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_ATOM_PRIORITY, KBASE_TL_ATTRIB_ATOM_STATE, KBASE_TL_ATTRIB_ATOM_PRIORITIZED, KBASE_TL_ATTRIB_ATOM_JIT, + KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, + KBASE_TL_ATTRIB_ATOM_JITFREEINFO, KBASE_TL_ATTRIB_AS_CONFIG, KBASE_TL_EVENT_LPU_SOFTSTOP, KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, @@ -171,7 +170,8 @@ enum tl_msg_id_aux { KBASE_AUX_PROTECTED_ENTER_START, KBASE_AUX_PROTECTED_ENTER_END, KBASE_AUX_PROTECTED_LEAVE_START, - KBASE_AUX_PROTECTED_LEAVE_END + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_JIT_STATS, }; /*****************************************************************************/ @@ -417,27 +417,6 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom,address_space" }, - { - KBASE_TL_DEP_ATOM_ATOM, - __stringify(KBASE_TL_DEP_ATOM_ATOM), - "atom2 depends on atom1", - "@pp", - "atom1,atom2" - }, - { - KBASE_TL_NDEP_ATOM_ATOM, - __stringify(KBASE_TL_NDEP_ATOM_ATOM), - "atom2 no longer depends on atom1", - "@pp", - "atom1,atom2" - }, - { - KBASE_TL_RDEP_ATOM_ATOM, - __stringify(KBASE_TL_RDEP_ATOM_ATOM), - "resolved dependecy of atom2 depending on atom1", - "@pp", - "atom1,atom2" - }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -470,8 +449,22 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_ATTRIB_ATOM_JIT, __stringify(KBASE_TL_ATTRIB_ATOM_JIT), "jit done for atom", - "@pLL", - "atom,edit_addr,new_addr" + "@pLLL", + "atom,edit_addr,new_addr,va_pages" + }, + { + KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, + __stringify(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO), + "Information about JIT allocations", + "@pLLLIIIII", + "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,flags,usg_id" + }, + { + KBASE_TL_ATTRIB_ATOM_JITFREEINFO, + __stringify(KBASE_TL_ATTRIB_ATOM_JITFREEINFO), + "Information about JIT frees", + "@pI", + "atom,j_id" }, { KBASE_TL_ATTRIB_AS_CONFIG, @@ -581,6 +574,13 @@ static const struct tp_desc tp_desc_aux[] = { "leave protected mode end", "@p", "gpu" + }, + { + KBASE_AUX_JIT_STATS, + __stringify(KBASE_AUX_JIT_STATS), + "per-bin JIT statistics", + "@IIIIII", + "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages" } }; @@ -899,7 +899,6 @@ static size_t kbasep_tlstream_msgbuf_submit( unsigned int wb_idx_raw, unsigned int wb_size) { - unsigned int rb_idx_raw = atomic_read(&stream->rbi); unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; /* Set stream as flushed. */ @@ -918,23 +917,11 @@ static size_t kbasep_tlstream_msgbuf_submit( * As stream->lock is not taken on reader side we must make sure memory * is updated correctly before this will happen. */ smp_wmb(); - wb_idx_raw++; - atomic_set(&stream->wbi, wb_idx_raw); + atomic_inc(&stream->wbi); /* Inform user that packets are ready for reading. */ wake_up_interruptible(&tl_event_queue); - /* Detect and mark overflow in this stream. */ - if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) { - /* Reader side depends on this increment to correctly handle - * overflows. The value shall be updated only if it was not - * modified by the reader. The data holding buffer will not be - * updated before stream->lock is released, however size of the - * buffer will. Make sure this increment is globally visible - * before information about selected write buffer size. */ - atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1); - } - wb_size = PACKET_HEADER_SIZE; if (stream->numbered) wb_size += PACKET_NUMBER_SIZE; @@ -1191,6 +1178,7 @@ static ssize_t kbasep_tlstream_read( while (copy_len < size) { enum tl_stream_type stype; unsigned int rb_idx_raw = 0; + unsigned int wb_idx_raw; unsigned int rb_idx; size_t rb_size; @@ -1227,18 +1215,26 @@ static ssize_t kbasep_tlstream_read( break; } - /* If the rbi still points to the packet we just processed - * then there was no overflow so we add the copied size to - * copy_len and move rbi on to the next packet + /* If the distance between read buffer index and write + * buffer index became more than PACKET_COUNT, then overflow + * happened and we need to ignore the last portion of bytes + * that we have just sent to user. */ smp_rmb(); - if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { + wb_idx_raw = atomic_read(&tl_stream[stype]->wbi); + + if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { copy_len += rb_size; atomic_inc(&tl_stream[stype]->rbi); - #if MALI_UNIT_TEST atomic_add(rb_size, &tlstream_bytes_collected); #endif /* MALI_UNIT_TEST */ + + } else { + const unsigned int new_rb_idx_raw = + wb_idx_raw - PACKET_COUNT + 1; + /* Adjust read buffer index to the next valid buffer */ + atomic_set(&tl_stream[stype]->rbi, new_rb_idx_raw); } } @@ -1947,81 +1943,6 @@ void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) -{ - const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom1, sizeof(atom1)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom2, sizeof(atom2)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) -{ - const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom1, sizeof(atom1)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom2, sizeof(atom2)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) -{ - const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom1, sizeof(atom1)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom2, sizeof(atom2)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; @@ -2252,12 +2173,12 @@ void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) } void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr) + void *atom, u64 edit_addr, u64 new_addr, u64 va_pages) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) - + sizeof(edit_addr) + sizeof(new_addr); + + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages); unsigned long flags; char *buffer; size_t pos = 0; @@ -2275,11 +2196,89 @@ void __kbase_tlstream_tl_attrib_atom_jit( buffer, pos, &edit_addr, sizeof(edit_addr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &new_addr, sizeof(new_addr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &va_pages, sizeof(va_pages)); + + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + void *atom, u64 va_pages, u64 commit_pages, u64 extent, + u32 jit_id, u32 bin_id, u32 max_allocations, u32 jit_flags, + u32 usage_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + + sizeof(va_pages) + sizeof(commit_pages) + + sizeof(extent) + sizeof(jit_id) + + sizeof(bin_id) + sizeof(max_allocations) + + sizeof(jit_flags) + sizeof(usage_id); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, + sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &va_pages, sizeof(va_pages)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &commit_pages, sizeof(commit_pages)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &extent, sizeof(extent)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &jit_id, sizeof(jit_id)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &bin_id, sizeof(bin_id)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &max_allocations, + sizeof(max_allocations)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &jit_flags, sizeof(jit_flags)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &usage_id, sizeof(usage_id)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(jit_id); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, + sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &jit_id, sizeof(jit_id)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } + void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { @@ -2636,3 +2635,40 @@ void __kbase_tlstream_aux_protected_leave_end(void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } + +void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, + u32 max_allocs, u32 allocs, + u32 va_pages, u32 ph_pages) +{ + const u32 msg_id = KBASE_AUX_JIT_STATS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + sizeof(bid) + + sizeof(max_allocs) + sizeof(allocs) + + sizeof(va_pages) + sizeof(ph_pages); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &bid, sizeof(bid)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &allocs, sizeof(allocs)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &va_pages, sizeof(va_pages)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &ph_pages, sizeof(ph_pages)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index bfa25d98264a..e2a3ea46a871 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -141,7 +141,12 @@ void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom); void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr); + void *atom, u64 edit_addr, u64 new_addr, u64 va_pages); +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + void *atom, u64 va_pages, u64 commit_pages, u64 extent, + u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags, + u32 usage_id); +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id); void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg); void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); @@ -158,6 +163,9 @@ void __kbase_tlstream_aux_protected_enter_start(void *gpu); void __kbase_tlstream_aux_protected_enter_end(void *gpu); void __kbase_tlstream_aux_protected_leave_start(void *gpu); void __kbase_tlstream_aux_protected_leave_end(void *gpu); +void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bin_id, + u32 max_allocations, u32 allocations, + u32 va_pages_nr, u32 ph_pages_nr); #define TLSTREAM_ENABLED (1 << 31) @@ -421,39 +429,6 @@ extern atomic_t kbase_tlstream_enabled; #define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \ __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) -/** - * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom - * @atom1: name of the child atom object - * @atom2: name of the parent atom object that depends on child atom - * - * Function emits a timeline message informing that parent atom waits for - * child atom object to be completed before start its execution. - */ -#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) - -/** - * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved - * @atom1: name of the child atom object - * @atom2: name of the parent atom object that depended on child atom - * - * Function emits a timeline message informing that parent atom execution - * dependency on child atom has been resolved. - */ -#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) - -/** - * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms - * @atom1: name of the child atom object - * @atom2: name of the parent atom object that depended on child atom - * - * Function emits a timeline message informing that parent atom execution - * dependency on child atom has been resolved. - */ -#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) - /** * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes * @atom: name of the atom object @@ -500,9 +475,51 @@ extern atomic_t kbase_tlstream_enabled; * @atom: atom identifier * @edit_addr: address edited by jit * @new_addr: address placed into the edited location + * @va_pages: maximum number of pages this jit can allocate */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \ - __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr) +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr, va_pages) \ + __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, \ + new_addr, va_pages) + +/** + * Information about the JIT allocation atom. + * + * @atom: Atom identifier. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @bin_id: The JIT allocation bin, used in conjunction with + * @max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocations: The maximum number of allocations allowed within + * the bin specified by @bin_id. Should be the same + * for all JIT allocations within the same bin. + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @usage_id: A hint about which allocation should be reused. + * The kernel should attempt to use a previous + * allocation with the same usage_id + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(atom, va_pages,\ + commit_pages, extent, jit_id, bin_id,\ + max_allocations, jit_flags, usage_id) \ + __TRACE_IF_ENABLED(tl_attrib_atom_jitallocinfo, atom, va_pages,\ + commit_pages, extent, jit_id, bin_id,\ + max_allocations, jit_flags, usage_id) + +/** + * Information about the JIT free atom. + * + * @atom: Atom identifier. + * @jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(atom, jit_id) \ + __TRACE_IF_ENABLED(tl_attrib_atom_jitfreeinfo, atom, jit_id) /** * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes @@ -640,5 +657,24 @@ extern atomic_t kbase_tlstream_enabled; #define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) +/** + * KBASE_TLSTREAM_AUX_JIT_STATS - JIT allocations per bin statistics + * + * @ctx_nr: kernel context number + * @bid: JIT bin id + * @max_allocs: maximum allocations allowed in this bin. + * UINT_MAX is a special value. It denotes that + * the parameter was not changed since the last time. + * @allocs: number of active allocations in this bin + * @va_pages: number of virtual pages allocated in this bin + * @ph_pages: number of physical pages allocated in this bin + * + * Function emits a timeline message indicating the JIT statistics + * for a given bin have chaned. + */ +#define KBASE_TLSTREAM_AUX_JIT_STATS(ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \ + __TRACE_IF_ENABLED(aux_jit_stats, ctx_nr, bid, \ + max_allocs, allocs, \ + va_pages, ph_pages) #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h index 32fffe0d80a9..77fb8183a3d1 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -172,8 +172,6 @@ int dummy_array[] = { KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), /* gpu_addr==value to write into JS_HEAD */ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), - /* kctx is the one being evicted, info_val == kctx to put in */ - KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), /* info_val == lower 32 bits of affinity */ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), @@ -236,14 +234,8 @@ int dummy_array[] = { /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE), - KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE), - KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE), - KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c deleted file mode 100755 index ee6bdf8ae324..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include -#include -#include - -#define CREATE_TRACE_POINTS - -#ifdef CONFIG_MALI_TRACE_TIMELINE -#include "mali_timeline.h" - -#include -#include - -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active); - -struct kbase_trace_timeline_desc { - char *enum_str; - char *desc; - char *format; - char *format_desc; -}; - -static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = { - #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc } - #include "mali_kbase_trace_timeline_defs.h" - #undef KBASE_TIMELINE_TRACE_CODE -}; - -#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table) - -static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos) -{ - if (*pos >= KBASE_NR_TRACE_CODES) - return NULL; - - return &kbase_trace_timeline_desc_table[*pos]; -} - -static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data) -{ -} - -static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos) -{ - (*pos)++; - - if (*pos == KBASE_NR_TRACE_CODES) - return NULL; - - return &kbase_trace_timeline_desc_table[*pos]; -} - -static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data) -{ - struct kbase_trace_timeline_desc *trace_desc = data; - - seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc); - return 0; -} - - -static const struct seq_operations kbasep_trace_timeline_seq_ops = { - .start = kbasep_trace_timeline_seq_start, - .next = kbasep_trace_timeline_seq_next, - .stop = kbasep_trace_timeline_seq_stop, - .show = kbasep_trace_timeline_seq_show, -}; - -static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &kbasep_trace_timeline_seq_ops); -} - -static const struct file_operations kbasep_trace_timeline_debugfs_fops = { - .open = kbasep_trace_timeline_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#ifdef CONFIG_DEBUG_FS - -void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("mali_timeline_defs", - S_IRUGO, kbdev->mali_debugfs_directory, NULL, - &kbasep_trace_timeline_debugfs_fops); -} - -#endif /* CONFIG_DEBUG_FS */ - -void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->timeline.slot_atoms_submitted[js] > 0) { - KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); - } else { - base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); - - KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1); - KBASE_TIMELINE_JOB_START(kctx, js, atom_number); - } - ++kbdev->timeline.slot_atoms_submitted[js]; - - KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); -} - -void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, - kbasep_js_atom_done_code done_code) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { - KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); - } else { - /* Job finished in JS_HEAD */ - base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); - - KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0); - KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number); - - /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */ - if (kbase_backend_nr_atoms_submitted(kbdev, js)) { - struct kbase_jd_atom *next_katom; - struct kbase_context *next_kctx; - - /* Peek the next atom - note that the atom in JS_HEAD will already - * have been dequeued */ - next_katom = kbase_backend_inspect_head(kbdev, js); - WARN_ON(!next_katom); - next_kctx = next_katom->kctx; - KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0); - KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1); - KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom)); - } - } - - --kbdev->timeline.slot_atoms_submitted[js]; - - KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); -} - -void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) -{ - int uid = 0; - int old_uid; - - /* If a producer already exists for the event, try to use their UID (multiple-producers) */ - uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]); - old_uid = uid; - - /* Get a new non-zero UID if we don't have one yet */ - while (!uid) - uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter); - - /* Try to use this UID */ - if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid)) - /* If it changed, raced with another producer: we've lost this UID */ - uid = 0; - - KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid); -} - -void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -{ - int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); - - if (uid != 0) { - if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) - /* If it changed, raced with another consumer: we've lost this UID */ - uid = 0; - - KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); - } -} - -void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -{ - int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); - - if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) - /* If it changed, raced with another consumer: we've lost this UID */ - uid = 0; - - KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); -} - -void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - /* Simply log the start of the transition */ - kbdev->timeline.l2_transitioning = true; - KBASE_TIMELINE_POWERING_L2(kbdev); -} - -void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - /* Simply log the end of the transition */ - if (kbdev->timeline.l2_transitioning) { - kbdev->timeline.l2_transitioning = false; - KBASE_TIMELINE_POWERED_L2(kbdev); - } -} - -#endif /* CONFIG_MALI_TRACE_TIMELINE */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h deleted file mode 100755 index c1a3dfc56752..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ /dev/null @@ -1,368 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#if !defined(_KBASE_TRACE_TIMELINE_H) -#define _KBASE_TRACE_TIMELINE_H - -#ifdef CONFIG_MALI_TRACE_TIMELINE - -enum kbase_trace_timeline_code { - #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val - #include "mali_kbase_trace_timeline_defs.h" - #undef KBASE_TIMELINE_TRACE_CODE -}; - -#ifdef CONFIG_DEBUG_FS - -/** Initialize Timeline DebugFS entries */ -void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); - -#else /* CONFIG_DEBUG_FS */ - -#define kbasep_trace_timeline_debugfs_init CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - -/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE - * functions. - * Output is timestamped by either sched_clock() (default), local_clock(), or - * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */ -#include "mali_timeline.h" - -/* Trace number of atoms in flight for kctx (atoms either not completed, or in - process of being returned to user */ -#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \ - (int)kctx->timeline.owner_tgid, \ - count); \ - } while (0) - -/* Trace atom_id being Ready to Run */ -#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \ - CTX_FLOW_ATOM_READY, \ - (int)kctx->timeline.owner_tgid, \ - atom_id); \ - } while (0) - -/* Trace number of atoms submitted to job slot js - * - * NOTE: This uses a different tracepoint to the head/next/soft-stop actions, - * so that those actions can be filtered out separately from this - * - * This is because this is more useful, as we can use it to calculate general - * utilization easily and accurately */ -#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_SLOT_ACTIVE, \ - (int)kctx->timeline.owner_tgid, \ - js, count); \ - } while (0) - - -/* Trace atoms present in JS_NEXT */ -#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_SLOT_NEXT, \ - (int)kctx->timeline.owner_tgid, \ - js, count); \ - } while (0) - -/* Trace atoms present in JS_HEAD */ -#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_SLOT_HEAD, \ - (int)kctx->timeline.owner_tgid, \ - js, count); \ - } while (0) - -/* Trace that a soft stop/evict from next is being attempted on a slot */ -#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_SLOT_STOPPING, \ - (kctx) ? (int)kctx->timeline.owner_tgid : 0, \ - js, count); \ - } while (0) - - - -/* Trace state of overall GPU power */ -#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_POWER_ACTIVE, active); \ - } while (0) - -/* Trace state of tiler power */ -#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_POWER_TILER_ACTIVE, \ - hweight64(bitmap)); \ - } while (0) - -/* Trace number of shaders currently powered */ -#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_POWER_SHADER_ACTIVE, \ - hweight64(bitmap)); \ - } while (0) - -/* Trace state of L2 power */ -#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_SET_GPU_POWER_L2_ACTIVE, \ - hweight64(bitmap)); \ - } while (0) - -/* Trace state of L2 cache*/ -#define KBASE_TIMELINE_POWERING_L2(kbdev) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_FLOW_GPU_POWER_L2_POWERING, \ - 1); \ - } while (0) - -#define KBASE_TIMELINE_POWERED_L2(kbdev) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ - SW_FLOW_GPU_POWER_L2_ACTIVE, \ - 1); \ - } while (0) - -/* Trace kbase_pm_send_event message send */ -#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ - SW_FLOW_PM_SEND_EVENT, \ - event_type, pm_event_id); \ - } while (0) - -/* Trace kbase_pm_worker message receive */ -#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ - SW_FLOW_PM_HANDLE_EVENT, \ - event_type, pm_event_id); \ - } while (0) - - -/* Trace atom_id starting in JS_HEAD */ -#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ - HW_START_GPU_JOB_CHAIN_SW_APPROX, \ - (int)kctx->timeline.owner_tgid, \ - js, _consumerof_atom_number); \ - } while (0) - -/* Trace atom_id stopping on JS_HEAD */ -#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ - HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \ - (int)kctx->timeline.owner_tgid, \ - js, _producerof_atom_number_completed); \ - } while (0) - -/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a - * certin caller */ -#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \ - trace_code, 1); \ - } while (0) - -/* Trace number of contexts active */ -#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \ - do { \ - struct timespec ts; \ - getrawmonotonic(&ts); \ - trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \ - count); \ - } while (0) - -/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ - -/** - * Trace that an atom is starting on a job slot - * - * The caller must be holding hwaccess_lock - */ -void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js); - -/** - * Trace that an atom has done on a job slot - * - * 'Done' in this sense can occur either because: - * - the atom in JS_HEAD finished - * - the atom in JS_NEXT was evicted - * - * Whether the atom finished or was evicted is passed in @a done_code - * - * It is assumed that the atom has already been removed from the submit slot, - * with either: - * - kbasep_jm_dequeue_submit_slot() - * - kbasep_jm_dequeue_tail_submit_slot() - * - * The caller must be holding hwaccess_lock - */ -void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, - kbasep_js_atom_done_code done_code); - - -/** Trace a pm event starting */ -void kbase_timeline_pm_send_event(struct kbase_device *kbdev, - enum kbase_timeline_pm_event event_sent); - -/** Trace a pm event finishing */ -void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); - -/** Check whether a pm event was present, and if so trace finishing it */ -void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); - -/** Trace L2 power-up start */ -void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev); - -/** Trace L2 power-up done */ -void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); - -#else - -#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP() - -#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP() - -#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP() - -#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP() - -#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP() - -#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP() - -#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP() - -#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP() - -#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP() - -#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP() - -#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP() - -#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP() - -#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() - -#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() - -#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP() - -#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP() - -#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP() - -#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() - -static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); -} - -static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, - kbasep_js_atom_done_code done_code) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); -} - -static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) -{ -} - -static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -{ -} - -static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) -{ -} - -static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) -{ -} - -static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) -{ -} -#endif /* CONFIG_MALI_TRACE_TIMELINE */ - -#endif /* _KBASE_TRACE_TIMELINE_H */ - diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h deleted file mode 100755 index 114bcac541e9..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** - * ***** DO NOT INCLUDE DIRECTLY ***** - * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ - -/* - * Conventions on Event Names: - * - * - The prefix determines something about how the timeline should be - * displayed, and is split up into various parts, separated by underscores: - * - 'SW' and 'HW' as the first part will be used to determine whether a - * timeline is to do with Software or Hardware - effectively, separate - * 'channels' for Software and Hardware - * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and - * signify related pairs of events - these are optional. - * - 'FLOW' indicates a generic event, which can use dependencies - * - This gives events such as: - * - 'SW_ENTER_FOO' - * - 'SW_LEAVE_FOO' - * - 'SW_FLOW_BAR_1' - * - 'SW_FLOW_BAR_2' - * - 'HW_START_BAZ' - * - 'HW_STOP_BAZ' - * - And an unadorned HW event: - * - 'HW_BAZ_FROZBOZ' - */ - -/* - * Conventions on parameter names: - * - anything with 'instance' in the name will have a separate timeline based - * on that instances. - * - underscored-prefixed parameters will by hidden by default on timelines - * - * Hence: - * - Different job slots have their own 'instance', based on the instance value - * - Per-context info (e.g. atoms on a context) have their own 'instance' - * (i.e. each context should be on a different timeline) - * - * Note that globally-shared resources can be tagged with a tgid, but we don't - * want an instance per context: - * - There's no point having separate Job Slot timelines for each context, that - * would be confusing - there's only really 3 job slots! - * - There's no point having separate Shader-powered timelines for each - * context, that would be confusing - all shader cores (whether it be 4, 8, - * etc) are shared in the system. - */ - - /* - * CTX events - */ - /* Separate timelines for each context 'instance'*/ - KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"), - KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"), - - /* - * SW Events - */ - /* Separate timelines for each slot 'instance' */ - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"), - /* Shader and overall power is shared - can't have separate instances of - * it, just tagging with the context */ - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"), - KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"), - - /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */ - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"), - /* SW L2 power events */ - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"), - - KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"), - - /* - * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock() - */ - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"), - - /* - * Significant Indirect callers of kbase_pm_check_transitions_nolock() - */ - /* kbase_pm_request_cores */ - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"), - /* kbase_pm_release_cores */ - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"), - KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"), - /* - * END: SW Functions that call kbase_pm_check_transitions_nolock() - */ - - /* - * HW Events - */ - KBASE_TIMELINE_TRACE_CODE(HW_MMU_FAULT, -"HW: MMU Fault", "%d,%d,%d", "_tgid,fault_type,fault_stage,asid"), - KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, -"HW: Job Chain start (SW approximated)", "%d,%d,%d", -"_tgid,job_slot,_consumerof_atom_number_ready"), - KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, -"HW: Job Chain stop (SW approximated)", "%d,%d,%d", -"_tgid,job_slot,_producerof_atom_number_completed") diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c deleted file mode 100755 index 3ea234aabeec..000000000000 --- a/drivers/gpu/arm/midgard/mali_kbase_utility.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include - -bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) -{ - struct list_head *pos = base->next; - - while (pos != base) { - if (pos == entry) - return true; - - pos = pos->next; - } - return false; -} diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h index f2e5a3381e13..8d4f044376a9 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_utility.h +++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h @@ -29,17 +29,6 @@ #error "Don't include this file directly, use mali_kbase.h instead" #endif -/** Test whether the given list entry is a member of the given list. - * - * @param base The head of the list to be tested - * @param entry The list entry to be tested - * - * @return true if entry is a member of base - * false otherwise - */ -bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); - - static inline void kbase_timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *timer)) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index 60e1800e4e47..51cb3651ed9a 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -20,215 +20,109 @@ * */ +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_reader.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" + #include -#include +#include +#include #include -#include -#include -#include #include +#include #include -#include #include -#include - -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_MALI_NO_MALI -#include -#endif - -/*****************************************************************************/ +#include /* Hwcnt reader API version */ -#define HWCNT_READER_API 1 - -/* The number of nanoseconds in a second. */ -#define NSECS_IN_SEC 1000000000ull /* ns */ - -/* The time resolution of dumping service. */ -#define DUMPING_RESOLUTION 500000ull /* ns */ - -/* The maximal supported number of dumping buffers. */ -#define MAX_BUFFER_COUNT 32 - -/* Size and number of hw counters blocks. */ -#define NR_CNT_BLOCKS_PER_GROUP 8 -#define NR_CNT_PER_BLOCK 64 -#define NR_BYTES_PER_CNT 4 -#define NR_BYTES_PER_HDR 16 -#define PRFCNT_EN_MASK_OFFSET 0x8 - -/*****************************************************************************/ +#define HWCNT_READER_API 1 -enum { - SHADER_HWCNT_BM, - TILER_HWCNT_BM, - MMU_L2_HWCNT_BM, - JM_HWCNT_BM -}; +/* The minimum allowed interval between dumps (equivalent to 10KHz) */ +#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) -enum vinstr_state { - VINSTR_IDLE, - VINSTR_DUMPING, - VINSTR_SUSPENDING, - VINSTR_SUSPENDED, - VINSTR_RESUMING -}; +/* The maximum allowed buffers per client */ +#define MAX_BUFFER_COUNT 32 /** - * struct kbase_vinstr_context - vinstr context per device - * @lock: protects the entire vinstr context, but the list of - * vinstr clients can be updated outside the lock using - * @state_lock. - * @kbdev: pointer to kbase device - * @kctx: pointer to kbase context - * @vmap: vinstr vmap for mapping hwcnt dump buffer - * @gpu_va: GPU hwcnt dump buffer address - * @cpu_va: the CPU side mapping of the hwcnt dump buffer - * @dump_size: size of the dump buffer in bytes - * @bitmap: current set of counters monitored, not always in sync - * with hardware - * @reprogram: when true, reprogram hwcnt block with the new set of - * counters - * @state: vinstr state - * @state_lock: protects information about vinstr state and list of - * clients. - * @suspend_waitq: notification queue to trigger state re-validation - * @suspend_cnt: reference counter of vinstr's suspend state - * @suspend_work: worker to execute on entering suspended state - * @resume_work: worker to execute on leaving suspended state - * @nclients: number of attached clients, pending or idle - * @nclients_suspended: number of attached but suspended clients - * @waiting_clients: head of list of clients being periodically sampled - * @idle_clients: head of list of clients being idle - * @suspended_clients: head of list of clients being suspended - * @thread: periodic sampling thread - * @waitq: notification queue of sampling thread - * @request_pending: request for action for sampling thread - * @clients_present: when true, we have at least one client - * Note: this variable is in sync. with nclients and is - * present to preserve simplicity. Protected by state_lock. - * @need_suspend: when true, a suspend has been requested while a resume is - * in progress. Resume worker should queue a suspend. - * @need_resume: when true, a resume has been requested while a suspend is - * in progress. Suspend worker should queue a resume. + * struct kbase_vinstr_context - IOCTL interface for userspace hardware + * counters. + * @hvirt: Hardware counter virtualizer used by vinstr. + * @metadata: Hardware counter metadata provided by virtualizer. + * @lock: Lock protecting all vinstr state. + * @suspend_count: Suspend reference count. If non-zero, timer and worker are + * prevented from being re-scheduled. + * @client_count: Number of vinstr clients. + * @clients: List of vinstr clients. + * @dump_timer: Timer that enqueues dump_work to a workqueue. + * @dump_work: Worker for performing periodic counter dumps. */ struct kbase_vinstr_context { - struct mutex lock; - struct kbase_device *kbdev; - struct kbase_context *kctx; - - struct kbase_vmap_struct vmap; - u64 gpu_va; - void *cpu_va; - size_t dump_size; - u32 bitmap[4]; - bool reprogram; - - enum vinstr_state state; - struct spinlock state_lock; - wait_queue_head_t suspend_waitq; - unsigned int suspend_cnt; - struct work_struct suspend_work; - struct work_struct resume_work; - - u32 nclients; - u32 nclients_suspended; - struct list_head waiting_clients; - struct list_head idle_clients; - struct list_head suspended_clients; - - struct task_struct *thread; - wait_queue_head_t waitq; - atomic_t request_pending; - - bool clients_present; - - bool need_suspend; - bool need_resume; + struct kbase_hwcnt_virtualizer *hvirt; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t suspend_count; + size_t client_count; + struct list_head clients; + struct hrtimer dump_timer; + struct work_struct dump_work; }; /** - * struct kbase_vinstr_client - a vinstr client attached to a vinstr context - * @vinstr_ctx: vinstr context client is attached to - * @list: node used to attach this client to list in vinstr context - * @buffer_count: number of buffers this client is using - * @event_mask: events this client reacts to - * @dump_size: size of one dump buffer in bytes - * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters - * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) - * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) - * @accum_buffer: temporary accumulation buffer for preserving counters - * @dump_time: next time this clients shall request hwcnt dump - * @dump_interval: interval between periodic hwcnt dumps - * @dump_buffers: kernel hwcnt dump buffers allocated by this client - * @dump_buffers_meta: metadata of dump buffers - * @meta_idx: index of metadata being accessed by userspace - * @read_idx: index of buffer read by userspace - * @write_idx: index of buffer being written by dumping service - * @waitq: client's notification queue - * @pending: when true, client has attached but hwcnt not yet updated - * @suspended: when true, client is suspended + * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. + * @vctx: Vinstr context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in vinstr + * context. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @enable_map: Counters enable map. + * @dump_bufs: Array of dump buffers allocated by this client. + * @dump_bufs_meta: Metadata of dump buffers. + * @meta_idx: Index of metadata being accessed by userspace. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. */ struct kbase_vinstr_client { - struct kbase_vinstr_context *vinstr_ctx; - struct list_head list; - unsigned int buffer_count; - u32 event_mask; - size_t dump_size; - u32 bitmap[4]; - void __user *legacy_buffer; - void *kernel_buffer; - void *accum_buffer; - u64 dump_time; - u32 dump_interval; - char *dump_buffers; - struct kbase_hwcnt_reader_metadata *dump_buffers_meta; - atomic_t meta_idx; - atomic_t read_idx; - atomic_t write_idx; - wait_queue_head_t waitq; - bool pending; - bool suspended; -}; - -/** - * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer - * @hrtimer: high resolution timer - * @vinstr_ctx: vinstr context - */ -struct kbasep_vinstr_wake_up_timer { - struct hrtimer hrtimer; - struct kbase_vinstr_context *vinstr_ctx; + struct kbase_vinstr_context *vctx; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct list_head node; + u64 next_dump_time_ns; + u32 dump_interval_ns; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer_array dump_bufs; + struct kbase_hwcnt_reader_metadata *dump_bufs_meta; + atomic_t meta_idx; + atomic_t read_idx; + atomic_t write_idx; + wait_queue_head_t waitq; }; -/*****************************************************************************/ - -static void kbase_vinstr_update_suspend( - struct kbase_vinstr_context *vinstr_ctx); - -static int kbasep_vinstr_service_task(void *data); - static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait); + struct file *filp, + poll_table *wait); + static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg); + struct file *filp, + unsigned int cmd, + unsigned long arg); + static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma); + struct file *filp, + struct vm_area_struct *vma); + static int kbasep_vinstr_hwcnt_reader_release( - struct inode *inode, - struct file *filp); + struct inode *inode, + struct file *filp); -/* The timeline stream file operations structure. */ +/* Vinstr client file operations */ static const struct file_operations vinstr_client_fops = { .poll = kbasep_vinstr_hwcnt_reader_poll, .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, @@ -237,1426 +131,724 @@ static const struct file_operations vinstr_client_fops = { .release = kbasep_vinstr_hwcnt_reader_release, }; -/*****************************************************************************/ - -static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_vinstr_timestamp_ns(void) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_ioctl_hwcnt_enable enable; - int err; - - enable.dump_buffer = vinstr_ctx->gpu_va; - enable.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; - enable.tiler_bm = vinstr_ctx->bitmap[TILER_HWCNT_BM]; - enable.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; - enable.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; - - /* Mark the context as active so the GPU is kept turned on */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread. */ - kbase_pm_context_active(kbdev); - - /* Schedule the context in */ - kbasep_js_schedule_privileged_ctx(kbdev, kctx); - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); - if (err) { - /* Release the context. This had its own Power Manager Active - * reference */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference */ - kbase_pm_context_idle(kbdev); - } + struct timespec ts; - return err; + getrawmonotonic(&ts); + return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; } -static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; - int err; - - err = kbase_instr_hwcnt_disable_internal(kctx); - if (err) { - dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", - kctx); - return; - } + /* Non-periodic client */ + if (interval == 0) + return 0; - /* Release the context. This had its own Power Manager Active reference. */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference. */ - kbase_pm_context_idle(kbdev); - - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronise, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + return (cur_ts_ns + 1) * interval; } -static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) -{ - disable_hwcnt(vinstr_ctx); - return enable_hwcnt(vinstr_ctx); -} +/** + * kbasep_vinstr_client_dump() - Perform a dump for a client. + * @vcli: Non-NULL pointer to a vinstr client. + * @event_id: Event type that triggered the dump. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_dump( + struct kbase_vinstr_client *vcli, + enum base_hwcnt_reader_event event_id) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + unsigned int write_idx; + unsigned int read_idx; + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_reader_metadata *meta; -static void hwcnt_bitmap_set(u32 dst[4], u32 src[4]) -{ - dst[JM_HWCNT_BM] = src[JM_HWCNT_BM]; - dst[TILER_HWCNT_BM] = src[TILER_HWCNT_BM]; - dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM]; - dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM]; -} + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); -static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) -{ - dst[JM_HWCNT_BM] |= src[JM_HWCNT_BM]; - dst[TILER_HWCNT_BM] |= src[TILER_HWCNT_BM]; - dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM]; - dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; -} + write_idx = atomic_read(&vcli->write_idx); + read_idx = atomic_read(&vcli->read_idx); -size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) -{ - size_t dump_size; - -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - u32 nr_cg; - - nr_cg = kbdev->gpu_props.num_core_groups; - dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } else -#endif /* CONFIG_MALI_NO_MALI */ - { - /* assume v5 for now */ - base_gpu_props *props = &kbdev->gpu_props.props; - u32 nr_l2 = props->l2_props.num_l2_slices; - u64 core_mask = props->coherency_info.group[0].core_mask; - u32 nr_blocks = fls64(core_mask); - - /* JM and tiler counter blocks are always present */ - dump_size = (2 + nr_l2 + nr_blocks) * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - } - return dump_size; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); + /* Check if there is a place to copy HWC block into. */ + if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) + return -EBUSY; + write_idx %= vcli->dump_bufs.buf_cnt; -static size_t kbasep_vinstr_dump_size_ctx( - struct kbase_vinstr_context *vinstr_ctx) -{ - return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); -} + dump_buf = &vcli->dump_bufs.bufs[write_idx]; + meta = &vcli->dump_bufs_meta[write_idx]; -static int kbasep_vinstr_map_kernel_dump_buffer( - struct kbase_vinstr_context *vinstr_ctx) -{ - struct kbase_va_region *reg; - struct kbase_context *kctx = vinstr_ctx->kctx; - u64 flags, nr_pages; + errcode = kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); + if (errcode) + return errcode; - flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; - vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); - nr_pages = PFN_UP(vinstr_ctx->dump_size); + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, - &vinstr_ctx->gpu_va); - if (!reg) - return -ENOMEM; + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); - vinstr_ctx->cpu_va = kbase_vmap( - kctx, - vinstr_ctx->gpu_va, - vinstr_ctx->dump_size, - &vinstr_ctx->vmap); - if (!vinstr_ctx->cpu_va) { - kbase_mem_free(kctx, vinstr_ctx->gpu_va); - return -ENOMEM; - } + meta->timestamp = ts_end_ns; + meta->event_id = event_id; + meta->buffer_idx = write_idx; + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&vcli->write_idx); + wake_up_interruptible(&vcli->waitq); return 0; } -static void kbasep_vinstr_unmap_kernel_dump_buffer( - struct kbase_vinstr_context *vinstr_ctx) +/** + * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. + * @vcli: Non-NULL pointer to a vinstr client. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) { - struct kbase_context *kctx = vinstr_ctx->kctx; + u64 ts_start_ns; + u64 ts_end_ns; - kbase_vunmap(kctx, &vinstr_ctx->vmap); - kbase_mem_free(kctx, vinstr_ctx->gpu_va); + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + /* A virtualizer dump with a NULL buffer will just clear the virtualizer + * client's buffer. + */ + return kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); } /** - * kbasep_vinstr_create_kctx - create kernel context for vinstr - * @vinstr_ctx: vinstr context - * Return: zero on success + * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic + * vinstr clients, then reschedule the dump + * worker appropriately. + * @vctx: Non-NULL pointer to the vinstr context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next periodic + * client dump. */ -static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) +static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element = NULL; - unsigned long flags; - bool enable_backend = false; - int err; - - vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); - if (!vinstr_ctx->kctx) - return -ENOMEM; + u64 cur_ts_ns; + u64 earliest_next_ns = U64_MAX; + struct kbase_vinstr_client *pos; - /* Map the master kernel dump buffer. The HW dumps the counters - * into this memory region. */ - err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx); - if (err) - goto failed_map; - - /* Add kernel context to list of contexts associated with device. */ - element = kzalloc(sizeof(*element), GFP_KERNEL); - if (element) { - element->kctx = vinstr_ctx->kctx; - mutex_lock(&kbdev->kctx_list_lock); - list_add(&element->link, &kbdev->kctx_list); - - /* Inform timeline client about new context. - * Do this while holding the lock to avoid tracepoint - * being created in both body and summary stream. */ - KBASE_TLSTREAM_TL_NEW_CTX( - vinstr_ctx->kctx, - vinstr_ctx->kctx->id, - (u32)(vinstr_ctx->kctx->tgid)); - - mutex_unlock(&kbdev->kctx_list_lock); - } else { - /* Don't treat this as a fail - just warn about it. */ - dev_warn(kbdev->dev, - "couldn't add kctx to kctx_list\n"); - } + WARN_ON(!vctx); + lockdep_assert_held(&vctx->lock); - /* Don't enable hardware counters if vinstr is suspended. - * Note that vinstr resume code is run under vinstr context lock, - * lower layer will be enabled as needed on resume. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE == vinstr_ctx->state) - enable_backend = true; - vinstr_ctx->clients_present = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (enable_backend) - err = enable_hwcnt(vinstr_ctx); - if (err) - goto failed_enable; - - vinstr_ctx->thread = kthread_run( - kbasep_vinstr_service_task, - vinstr_ctx, - "mali_vinstr_service"); - if (IS_ERR(vinstr_ctx->thread)) { - err = PTR_ERR(vinstr_ctx->thread); - goto failed_kthread; - } + cur_ts_ns = kbasep_vinstr_timestamp_ns(); - return 0; + /* + * Update each client's next dump time, and find the earliest next + * dump time if any of the clients have a non-zero interval. + */ + list_for_each_entry(pos, &vctx->clients, node) { + const u64 cli_next_ns = + kbasep_vinstr_next_dump_time_ns( + cur_ts_ns, pos->dump_interval_ns); + + /* Non-zero next dump time implies a periodic client */ + if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) + earliest_next_ns = cli_next_ns; -failed_kthread: - disable_hwcnt(vinstr_ctx); -failed_enable: - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->clients_present = false; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); - if (element) { - mutex_lock(&kbdev->kctx_list_lock); - list_del(&element->link); - kfree(element); - mutex_unlock(&kbdev->kctx_list_lock); - KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); + pos->next_dump_time_ns = cli_next_ns; } -failed_map: - kbase_destroy_context(vinstr_ctx->kctx); - vinstr_ctx->kctx = NULL; - return err; + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&vctx->dump_timer); + + /* Start the timer if there are periodic clients and vinstr is not + * suspended. + */ + if ((earliest_next_ns != U64_MAX) && + (vctx->suspend_count == 0) && + !WARN_ON(earliest_next_ns < cur_ts_ns)) + hrtimer_start( + &vctx->dump_timer, + ns_to_ktime(earliest_next_ns - cur_ts_ns), + HRTIMER_MODE_REL); } /** - * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context - * @vinstr_ctx: vinstr context + * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients + * that need to be dumped, then reschedules itself. + * @work: Work structure. */ -static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) +static void kbasep_vinstr_dump_worker(struct work_struct *work) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element; - struct kbasep_kctx_list_element *tmp; - bool found = false; - bool hwcnt_disabled = false; - unsigned long flags; - - /* Release hw counters dumping resources. */ - vinstr_ctx->thread = NULL; - - /* Simplify state transitions by specifying that we have no clients. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->clients_present = false; - if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state)) - hwcnt_disabled = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - if (!hwcnt_disabled) - disable_hwcnt(vinstr_ctx); - - kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); - - /* Remove kernel context from the device's contexts list. */ - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { - if (element->kctx == vinstr_ctx->kctx) { - list_del(&element->link); - kfree(element); - found = true; - } - } - mutex_unlock(&kbdev->kctx_list_lock); + struct kbase_vinstr_context *vctx = + container_of(work, struct kbase_vinstr_context, dump_work); + struct kbase_vinstr_client *pos; + u64 cur_time_ns; - if (!found) - dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + mutex_lock(&vctx->lock); - /* Destroy context. */ - kbase_destroy_context(vinstr_ctx->kctx); + cur_time_ns = kbasep_vinstr_timestamp_ns(); - /* Inform timeline client about context destruction. */ - KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx); + /* Dump all periodic clients whose next dump time is before the current + * time. + */ + list_for_each_entry(pos, &vctx->clients, node) { + if ((pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_vinstr_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC); + } + + /* Update the next dump times of all periodic clients, then reschedule + * this worker at the earliest next dump time. + */ + kbasep_vinstr_reschedule_worker(vctx); - vinstr_ctx->kctx = NULL; + mutex_unlock(&vctx->lock); } /** - * kbasep_vinstr_attach_client - Attach a client to the vinstr core - * @vinstr_ctx: vinstr context - * @buffer_count: requested number of dump buffers - * @bitmap: bitmaps describing which counters should be enabled - * @argp: pointer where notification descriptor shall be stored - * @kernel_buffer: pointer to kernel side buffer - * - * Return: vinstr opaque client handle or NULL on failure + * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for + * execution as soon as possible. + * @timer: Timer structure. */ -static struct kbase_vinstr_client *kbasep_vinstr_attach_client( - struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, - u32 bitmap[4], void *argp, void *kernel_buffer) +static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) { - struct task_struct *thread = NULL; - struct kbase_vinstr_client *cli; - unsigned long flags; - bool clients_present = false; + struct kbase_vinstr_context *vctx = + container_of(timer, struct kbase_vinstr_context, dump_timer); - KBASE_DEBUG_ASSERT(vinstr_ctx); + /* We don't need to check vctx->suspend_count here, as the suspend + * function will ensure that any worker enqueued here is immediately + * cancelled, and the worker itself won't reschedule this timer if + * suspend_count != 0. + */ +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + return HRTIMER_NORESTART; +} - if (buffer_count > MAX_BUFFER_COUNT - || (buffer_count & (buffer_count - 1))) - return NULL; +/** + * kbasep_vinstr_client_destroy() - Destroy a vinstr client. + * @vcli: vinstr client. Must not be attached to a vinstr context. + */ +static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) +{ + if (!vcli) + return; - cli = kzalloc(sizeof(*cli), GFP_KERNEL); - if (!cli) - return NULL; + kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); + kfree(vcli->dump_bufs_meta); + kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); + kbase_hwcnt_enable_map_free(&vcli->enable_map); + kfree(vcli); +} - cli->vinstr_ctx = vinstr_ctx; - cli->buffer_count = buffer_count; - cli->event_mask = - (1 << BASE_HWCNT_READER_EVENT_MANUAL) | - (1 << BASE_HWCNT_READER_EVENT_PERIODIC); - cli->pending = true; +/** + * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to + * the vinstr context. + * @vctx: Non-NULL pointer to vinstr context. + * @setup: Non-NULL pointer to hardware counter ioctl setup structure. + * setup->buffer_count must not be 0. + * @out_vcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_create( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup, + struct kbase_vinstr_client **out_vcli) +{ + int errcode; + struct kbase_vinstr_client *vcli; + struct kbase_hwcnt_physical_enable_map phys_em; - hwcnt_bitmap_set(cli->bitmap, bitmap); + WARN_ON(!vctx); + WARN_ON(!setup); + WARN_ON(setup->buffer_count == 0); - mutex_lock(&vinstr_ctx->lock); + vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); + if (!vcli) + return -ENOMEM; - hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap); - vinstr_ctx->reprogram = true; + vcli->vctx = vctx; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + errcode = kbase_hwcnt_enable_map_alloc( + vctx->metadata, &vcli->enable_map); + if (errcode) + goto error; - /* If this is the first client, create the vinstr kbase - * context. This context is permanently resident until the - * last client exits. */ - if (!clients_present) { - hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap); - if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0) - goto error; + phys_em.jm_bm = setup->jm_bm; + phys_em.shader_bm = setup->shader_bm; + phys_em.tiler_bm = setup->tiler_bm; + phys_em.mmu_l2_bm = setup->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); - vinstr_ctx->reprogram = false; - cli->pending = false; - } + errcode = kbase_hwcnt_dump_buffer_array_alloc( + vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + if (errcode) + goto error; - /* The GPU resets the counter block every time there is a request - * to dump it. We need a per client kernel buffer for accumulating - * the counters. */ - cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); - cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); - if (!cli->accum_buffer) + errcode = -ENOMEM; + vcli->dump_bufs_meta = kmalloc_array( + setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); + if (!vcli->dump_bufs_meta) goto error; - /* Prepare buffers. */ - if (cli->buffer_count) { - int *fd = (int *)argp; - size_t tmp; - - /* Allocate area for buffers metadata storage. */ - tmp = sizeof(struct kbase_hwcnt_reader_metadata) * - cli->buffer_count; - cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL); - if (!cli->dump_buffers_meta) - goto error; - - /* Allocate required number of dumping buffers. */ - cli->dump_buffers = (char *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(cli->dump_size * cli->buffer_count)); - if (!cli->dump_buffers) - goto error; - - /* Create descriptor for user-kernel data exchange. */ - *fd = anon_inode_getfd( - "[mali_vinstr_desc]", - &vinstr_client_fops, - cli, - O_RDONLY | O_CLOEXEC); - if (0 > *fd) - goto error; - } else if (kernel_buffer) { - cli->kernel_buffer = kernel_buffer; - } else { - cli->legacy_buffer = (void __user *)argp; - } + errcode = kbase_hwcnt_virtualizer_client_create( + vctx->hvirt, &vcli->enable_map, &vcli->hvcli); + if (errcode) + goto error; - atomic_set(&cli->read_idx, 0); - atomic_set(&cli->meta_idx, 0); - atomic_set(&cli->write_idx, 0); - init_waitqueue_head(&cli->waitq); + init_waitqueue_head(&vcli->waitq); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->nclients++; - list_add(&cli->list, &vinstr_ctx->idle_clients); - kbase_vinstr_update_suspend(vinstr_ctx); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + *out_vcli = vcli; + return 0; +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; +} - mutex_unlock(&vinstr_ctx->lock); +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx) +{ + struct kbase_vinstr_context *vctx; + const struct kbase_hwcnt_metadata *metadata; - return cli; + if (!hvirt || !out_vctx) + return -EINVAL; -error: - kfree(cli->dump_buffers_meta); - if (cli->dump_buffers) - free_pages( - (unsigned long)cli->dump_buffers, - get_order(cli->dump_size * cli->buffer_count)); - kfree(cli->accum_buffer); - if (!clients_present && vinstr_ctx->kctx) { - thread = vinstr_ctx->thread; - kbasep_vinstr_destroy_kctx(vinstr_ctx); - } - kfree(cli); + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + if (!metadata) + return -EINVAL; - mutex_unlock(&vinstr_ctx->lock); + vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); + if (!vctx) + return -ENOMEM; + + vctx->hvirt = hvirt; + vctx->metadata = metadata; - /* Thread must be stopped after lock is released. */ - if (thread) - kthread_stop(thread); + mutex_init(&vctx->lock); + INIT_LIST_HEAD(&vctx->clients); + hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + vctx->dump_timer.function = kbasep_vinstr_dump_timer; + INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); - return NULL; + *out_vctx = vctx; + return 0; } -void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) +void kbase_vinstr_term(struct kbase_vinstr_context *vctx) { - struct kbase_vinstr_context *vinstr_ctx; - struct kbase_vinstr_client *iter, *tmp; - struct task_struct *thread = NULL; - u32 zerobitmap[4] = { 0 }; - int cli_found = 0; - unsigned long flags; - bool clients_present; - - KBASE_DEBUG_ASSERT(cli); - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } - } - if (!cli_found) { - list_for_each_entry_safe( - iter, tmp, &vinstr_ctx->waiting_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } - } - } - if (!cli_found) { - list_for_each_entry_safe( - iter, tmp, &vinstr_ctx->suspended_clients, list) { - if (iter == cli) { - cli_found = 1; - break; - } + if (!vctx) + return; + + cancel_work_sync(&vctx->dump_work); + + /* Non-zero client count implies client leak */ + if (WARN_ON(vctx->client_count != 0)) { + struct kbase_vinstr_client *pos, *n; + + list_for_each_entry_safe(pos, n, &vctx->clients, node) { + list_del(&pos->node); + vctx->client_count--; + kbasep_vinstr_client_destroy(pos); } } - KBASE_DEBUG_ASSERT(cli_found); - if (cli_found) { - vinstr_ctx->reprogram = true; - list_del(&iter->list); - } + WARN_ON(vctx->client_count != 0); + kfree(vctx); +} - if (!cli->suspended) - vinstr_ctx->nclients--; - else - vinstr_ctx->nclients_suspended--; +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; - kbase_vinstr_update_suspend(vinstr_ctx); + mutex_lock(&vctx->lock); - clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended); + if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) + vctx->suspend_count++; - /* Rebuild context bitmap now that the client has detached */ - hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap); - list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); - list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list) - hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap); + mutex_unlock(&vctx->lock); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + /* Always sync cancel the timer and then the worker, regardless of the + * new suspend count. + * + * This ensures concurrent calls to kbase_vinstr_suspend() always block + * until vinstr is fully suspended. + * + * The timer is cancelled before the worker, as the timer + * unconditionally re-enqueues the worker, but the worker checks the + * suspend_count that we just incremented before rescheduling the timer. + * + * Therefore if we cancel the worker first, the timer might re-enqueue + * the worker before we cancel the timer, but the opposite is not + * possible. + */ + hrtimer_cancel(&vctx->dump_timer); + cancel_work_sync(&vctx->dump_work); +} - kfree(cli->dump_buffers_meta); - free_pages( - (unsigned long)cli->dump_buffers, - get_order(cli->dump_size * cli->buffer_count)); - kfree(cli->accum_buffer); - kfree(cli); +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; - if (!clients_present) { - thread = vinstr_ctx->thread; - kbasep_vinstr_destroy_kctx(vinstr_ctx); - } + mutex_lock(&vctx->lock); - mutex_unlock(&vinstr_ctx->lock); + if (!WARN_ON(vctx->suspend_count == 0)) { + vctx->suspend_count--; - /* Thread must be stopped after lock is released. */ - if (thread) - kthread_stop(thread); -} -KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); + /* Last resume, so re-enqueue the worker if we have any periodic + * clients. + */ + if (vctx->suspend_count == 0) { + struct kbase_vinstr_client *pos; + bool has_periodic_clients = false; -/* Accumulate counters in the dump buffer */ -static void accum_dump_buffer(void *dst, void *src, size_t dump_size) -{ - size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - u32 *d = dst; - u32 *s = src; - size_t i, j; - - for (i = 0; i < dump_size; i += block_size) { - /* skip over the header block */ - d += NR_BYTES_PER_HDR / sizeof(u32); - s += NR_BYTES_PER_HDR / sizeof(u32); - for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) { - /* saturate result if addition would result in wraparound */ - if (U32_MAX - *d < *s) - *d = U32_MAX; - else - *d += *s; - d++; - s++; + list_for_each_entry(pos, &vctx->clients, node) { + if (pos->dump_interval_ns != 0) { + has_periodic_clients = true; + break; + } + } + + if (has_periodic_clients) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif } } -} -/* This is the Midgard v4 patch function. It copies the headers for each - * of the defined blocks from the master kernel buffer and then patches up - * the performance counter enable mask for each of the blocks to exclude - * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v4( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client *cli) -{ - u32 *mask; - u8 *dst = cli->accum_buffer; - u8 *src = vinstr_ctx->cpu_va; - u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups; - size_t i, group_size, group; - enum { - SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, - JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT - }; - - group_size = NR_CNT_BLOCKS_PER_GROUP * - NR_CNT_PER_BLOCK * - NR_BYTES_PER_CNT; - for (i = 0; i < nr_cg; i++) { - group = i * group_size; - /* copy shader core headers */ - memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE], - NR_BYTES_PER_HDR); - memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE], - NR_BYTES_PER_HDR); - - /* copy tiler header */ - memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE], - NR_BYTES_PER_HDR); - - /* copy mmu header */ - memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE], - NR_BYTES_PER_HDR); - - /* copy job manager header */ - memcpy(&dst[group + JM_BASE], &src[group + JM_BASE], - NR_BYTES_PER_HDR); - - /* patch the shader core enable mask */ - mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - - /* patch the tiler core enable mask */ - mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[TILER_HWCNT_BM]; - - /* patch the mmu core enable mask */ - mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; - - /* patch the job manager enable mask */ - mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[JM_HWCNT_BM]; - } + mutex_unlock(&vctx->lock); } -/* This is the Midgard v5 patch function. It copies the headers for each - * of the defined blocks from the master kernel buffer and then patches up - * the performance counter enable mask for each of the blocks to exclude - * counters that were not requested by the client. */ -static void patch_dump_buffer_hdr_v5( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client *cli) +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) { - struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; - u32 i, nr_l2; - u64 core_mask; - u32 *mask; - u8 *dst = cli->accum_buffer; - u8 *src = vinstr_ctx->cpu_va; - size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - - /* copy and patch job manager header */ - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[JM_HWCNT_BM]; - dst += block_size; - src += block_size; - - /* copy and patch tiler header */ - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[TILER_HWCNT_BM]; - dst += block_size; - src += block_size; - - /* copy and patch MMU/L2C headers */ - nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; - for (i = 0; i < nr_l2; i++) { - memcpy(dst, src, NR_BYTES_PER_HDR); - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; - dst += block_size; - src += block_size; - } + int errcode; + int fd; + struct kbase_vinstr_client *vcli = NULL; - /* copy and patch shader core headers */ - core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; - while (0ull != core_mask) { - memcpy(dst, src, NR_BYTES_PER_HDR); - if (0ull != (core_mask & 1ull)) { - /* if block is not reserved update header */ - mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; - *mask &= cli->bitmap[SHADER_HWCNT_BM]; - } - dst += block_size; - src += block_size; + if (!vctx || !setup || + (setup->buffer_count == 0) || + (setup->buffer_count > MAX_BUFFER_COUNT)) + return -EINVAL; - core_mask >>= 1; - } + errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); + if (errcode) + goto error; + + errcode = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + vcli, + O_RDONLY | O_CLOEXEC); + if (errcode < 0) + goto error; + + fd = errcode; + + /* Add the new client. No need to reschedule worker, as not periodic */ + mutex_lock(&vctx->lock); + + vctx->client_count++; + list_add(&vcli->node, &vctx->clients); + + mutex_unlock(&vctx->lock); + + return fd; +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; } /** - * accum_clients - accumulate dumped hw counters for all known clients - * @vinstr_ctx: vinstr context + * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready + * buffers. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: Non-zero if client has at least one dumping buffer filled that was + * not notified to user yet. */ -static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) +static int kbasep_vinstr_hwcnt_reader_buffer_ready( + struct kbase_vinstr_client *cli) { - struct kbase_vinstr_client *iter; - int v4 = 0; - -#ifndef CONFIG_MALI_NO_MALI - v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); -#endif - - list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { - /* Don't bother accumulating clients whose hwcnt requests - * have not yet been honoured. */ - if (iter->pending) - continue; - if (v4) - patch_dump_buffer_hdr_v4(vinstr_ctx, iter); - else - patch_dump_buffer_hdr_v5(vinstr_ctx, iter); - accum_dump_buffer( - iter->accum_buffer, - vinstr_ctx->cpu_va, - iter->dump_size); - } - list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) { - /* Don't bother accumulating clients whose hwcnt requests - * have not yet been honoured. */ - if (iter->pending) - continue; - if (v4) - patch_dump_buffer_hdr_v4(vinstr_ctx, iter); - else - patch_dump_buffer_hdr_v5(vinstr_ctx, iter); - accum_dump_buffer( - iter->accum_buffer, - vinstr_ctx->cpu_va, - iter->dump_size); - } + WARN_ON(!cli); + return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); } -/*****************************************************************************/ - /** - * kbasep_vinstr_get_timestamp - return timestamp + * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. + * @cli: Non-NULL pointer to vinstr client. * - * Function returns timestamp value based on raw monotonic timer. Value will - * wrap around zero in case of overflow. - * - * Return: timestamp value + * Return: 0 on success, else error code. */ -static u64 kbasep_vinstr_get_timestamp(void) +static long kbasep_vinstr_hwcnt_reader_ioctl_dump( + struct kbase_vinstr_client *cli) { - struct timespec ts; + int errcode; - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL); + + mutex_unlock(&cli->vctx->lock); + return errcode; } /** - * kbasep_vinstr_add_dump_request - register client's dumping request - * @cli: requesting client - * @waiting_clients: list of pending dumping requests + * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: 0 on success, else error code. */ -static void kbasep_vinstr_add_dump_request( - struct kbase_vinstr_client *cli, - struct list_head *waiting_clients) +static long kbasep_vinstr_hwcnt_reader_ioctl_clear( + struct kbase_vinstr_client *cli) { - struct kbase_vinstr_client *tmp; + int errcode; - if (list_empty(waiting_clients)) { - list_add(&cli->list, waiting_clients); - return; - } - list_for_each_entry(tmp, waiting_clients, list) { - if (tmp->dump_time > cli->dump_time) { - list_add_tail(&cli->list, &tmp->list); - return; - } - } - list_add_tail(&cli->list, waiting_clients); + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_clear(cli); + + mutex_unlock(&cli->vctx->lock); + return errcode; } /** - * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level - * dump and accumulate them for known - * clients - * @vinstr_ctx: vinstr context - * @timestamp: pointer where collection timestamp will be recorded + * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. * - * Return: zero on success + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_collect_and_accumulate( - struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) +static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) { - unsigned long flags; - int rcode; + unsigned int meta_idx = atomic_read(&cli->meta_idx); + unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; -#ifdef CONFIG_MALI_NO_MALI - /* The dummy model needs the CPU mapping. */ - gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); -#endif + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; + + /* Metadata sanity check. */ + WARN_ON(idx != meta->buffer_idx); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if there is any buffer available. */ + if (atomic_read(&cli->write_idx) == meta_idx) return -EAGAIN; - } else { - vinstr_ctx->state = VINSTR_DUMPING; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - /* Request HW counters dump. - * Disable preemption to make dump timestamp more accurate. */ - preempt_disable(); - *timestamp = kbasep_vinstr_get_timestamp(); - rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx); - preempt_enable(); - - if (!rcode) - rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); - WARN_ON(rcode); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDING: - schedule_work(&vinstr_ctx->suspend_work); - break; - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); - break; - default: - break; - } - /* Accumulate values of collected counters. */ - if (!rcode) - accum_clients(vinstr_ctx); + /* Check if previously taken buffer was put back. */ + if (atomic_read(&cli->read_idx) != meta_idx) + return -EBUSY; + + /* Copy next available buffer's metadata to user. */ + if (copy_to_user(buffer, meta, size)) + return -EFAULT; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + atomic_inc(&cli->meta_idx); - return rcode; + return 0; } /** - * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel - * buffer - * @cli: requesting client - * @timestamp: timestamp when counters were collected - * @event_id: id of event that caused triggered counters collection + * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. * - * Return: zero on success + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_fill_dump_buffer( - struct kbase_vinstr_client *cli, u64 timestamp, - enum base_hwcnt_reader_event event_id) +static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) { - unsigned int write_idx = atomic_read(&cli->write_idx); - unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; - struct kbase_hwcnt_reader_metadata *meta; - void *buffer; + struct kbase_hwcnt_reader_metadata meta; + + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if any buffer was taken. */ + if (atomic_read(&cli->meta_idx) == read_idx) + return -EPERM; + + /* Check if correct buffer is put back. */ + if (copy_from_user(&meta, buffer, size)) + return -EFAULT; + if (idx != meta.buffer_idx) + return -EINVAL; + + atomic_inc(&cli->read_idx); - /* Check if there is a place to copy HWC block into. */ - if (write_idx - read_idx == cli->buffer_count) - return -1; - write_idx %= cli->buffer_count; - - /* Fill in dump buffer and its metadata. */ - buffer = &cli->dump_buffers[write_idx * cli->dump_size]; - meta = &cli->dump_buffers_meta[write_idx]; - meta->timestamp = timestamp; - meta->event_id = event_id; - meta->buffer_idx = write_idx; - memcpy(buffer, cli->accum_buffer, cli->dump_size); return 0; } /** - * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer - * allocated in userspace - * @cli: requesting client + * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @interval: Periodic dumping interval (disable periodic dumping if 0). * - * Return: zero on success - * - * This is part of legacy ioctl interface. + * Return: 0 always. */ -static int kbasep_vinstr_fill_dump_buffer_legacy( - struct kbase_vinstr_client *cli) -{ - void __user *buffer = cli->legacy_buffer; - int rcode; +static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + struct kbase_vinstr_client *cli, + u32 interval) +{ + mutex_lock(&cli->vctx->lock); + + if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) + interval = DUMP_INTERVAL_MIN_NS; + /* Update the interval, and put in a dummy next dump time */ + cli->dump_interval_ns = interval; + cli->next_dump_time_ns = 0; + + /* + * If it's a periodic client, kick off the worker early to do a proper + * timer reschedule. Return value is ignored, as we don't care if the + * worker is already queued. + */ + if ((interval != 0) && (cli->vctx->suspend_count == 0)) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &cli->vctx->dump_work); +#else + queue_work(system_highpri_wq, &cli->vctx->dump_work); +#endif + + mutex_unlock(&cli->vctx->lock); - /* Copy data to user buffer. */ - rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size); - if (rcode) { - pr_warn("error while copying buffer to user\n"); - return -EFAULT; - } return 0; } /** - * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer - * allocated in kernel space - * @cli: requesting client - * - * Return: zero on success + * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to enable. * - * This is part of the kernel client interface. + * Return: 0 always. */ -static int kbasep_vinstr_fill_dump_buffer_kernel( - struct kbase_vinstr_client *cli) +static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) { - memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); - + /* No-op, as events aren't supported */ return 0; } /** - * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst - * @vinstr_ctx: vinstr context + * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl + * command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to disable. + * + * Return: 0 always. */ -static void kbasep_vinstr_reprogram( - struct kbase_vinstr_context *vinstr_ctx) +static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) { - unsigned long flags; - bool suspended = false; - - /* Don't enable hardware counters if vinstr is suspended. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) - suspended = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (suspended) - return; - - /* Change to suspended state is done while holding vinstr context - * lock. Below code will then no re-enable the instrumentation. */ - - if (vinstr_ctx->reprogram) { - struct kbase_vinstr_client *iter; - - if (!reprogram_hwcnt(vinstr_ctx)) { - vinstr_ctx->reprogram = false; - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - list_for_each_entry( - iter, - &vinstr_ctx->idle_clients, - list) - iter->pending = false; - list_for_each_entry( - iter, - &vinstr_ctx->waiting_clients, - list) - iter->pending = false; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - } - } + /* No-op, as events aren't supported */ + return 0; } /** - * kbasep_vinstr_update_client - copy accumulated counters to user readable - * buffer and notify the user - * @cli: requesting client - * @timestamp: timestamp when counters were collected - * @event_id: id of event that caused triggered counters collection + * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @hwver: Non-NULL pointer to user buffer where HW version will be stored. * - * Return: zero on success + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_update_client( - struct kbase_vinstr_client *cli, u64 timestamp, - enum base_hwcnt_reader_event event_id) +static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + struct kbase_vinstr_client *cli, + u32 __user *hwver) { - int rcode = 0; - unsigned long flags; - - /* Copy collected counters to user readable buffer. */ - if (cli->buffer_count) - rcode = kbasep_vinstr_fill_dump_buffer( - cli, timestamp, event_id); - else if (cli->kernel_buffer) - rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); - else - rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); - - /* Prepare for next request. */ - memset(cli->accum_buffer, 0, cli->dump_size); - - spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); - /* Check if client was put to suspend state while it was being updated */ - if (cli->suspended) - rcode = -EINVAL; - spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags); + u32 ver = 0; + const enum kbase_hwcnt_gpu_group_type type = + kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); - if (rcode) - goto exit; - - /* Notify client. Make sure all changes to memory are visible. */ - wmb(); - atomic_inc(&cli->write_idx); - wake_up_interruptible(&cli->waitq); + switch (type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + ver = 4; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + ver = 5; + break; + default: + WARN_ON(true); + } -exit: - return rcode; + if (ver != 0) { + return put_user(ver, hwver); + } else { + return -EINVAL; + } } /** - * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function + * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. + * @filp: Non-NULL pointer to file structure. + * @cmd: User command. + * @arg: Command's argument. * - * @hrtimer: high resolution timer - * - * Return: High resolution timer restart enum. + * Return: 0 on success, else error code. */ -static enum hrtimer_restart kbasep_vinstr_wake_up_callback( - struct hrtimer *hrtimer) +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg) { - struct kbasep_vinstr_wake_up_timer *timer = - container_of( - hrtimer, - struct kbasep_vinstr_wake_up_timer, - hrtimer); - - KBASE_DEBUG_ASSERT(timer); - - atomic_set(&timer->vinstr_ctx->request_pending, 1); - wake_up_all(&timer->vinstr_ctx->waitq); - - return HRTIMER_NORESTART; -} - -/** - * kbasep_vinstr_service_task - HWC dumping service thread - * - * @data: Pointer to vinstr context structure. - * - * Return: 0 on success; -ENOMEM if timer allocation fails - */ -static int kbasep_vinstr_service_task(void *data) -{ - struct kbase_vinstr_context *vinstr_ctx = data; - struct kbasep_vinstr_wake_up_timer *timer; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - timer = kmalloc(sizeof(*timer), GFP_KERNEL); - - if (!timer) { - dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n"); - return -ENOMEM; - } - - hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - - timer->hrtimer.function = kbasep_vinstr_wake_up_callback; - timer->vinstr_ctx = vinstr_ctx; - - while (!kthread_should_stop()) { - struct kbase_vinstr_client *cli = NULL; - struct kbase_vinstr_client *tmp; - int rcode; - unsigned long flags; - - u64 timestamp = kbasep_vinstr_get_timestamp(); - u64 dump_time = 0; - struct list_head expired_requests; - - /* Hold lock while performing operations on lists of clients. */ - mutex_lock(&vinstr_ctx->lock); - - /* Closing thread must not interact with client requests. */ - if (current == vinstr_ctx->thread) { - atomic_set(&vinstr_ctx->request_pending, 0); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (!list_empty(&vinstr_ctx->waiting_clients)) { - cli = list_first_entry( - &vinstr_ctx->waiting_clients, - struct kbase_vinstr_client, - list); - dump_time = cli->dump_time; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - } - - if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) { - mutex_unlock(&vinstr_ctx->lock); - - /* Sleep until next dumping event or service request. */ - if (cli) { - u64 diff = dump_time - timestamp; - - hrtimer_start( - &timer->hrtimer, - ns_to_ktime(diff), - HRTIMER_MODE_REL); - } - wait_event( - vinstr_ctx->waitq, - atomic_read( - &vinstr_ctx->request_pending) || - kthread_should_stop()); - hrtimer_cancel(&timer->hrtimer); - continue; - } - - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, - ×tamp); - - INIT_LIST_HEAD(&expired_requests); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - /* Find all expired requests. */ - list_for_each_entry_safe( - cli, - tmp, - &vinstr_ctx->waiting_clients, - list) { - s64 tdiff = - (s64)(timestamp + DUMPING_RESOLUTION) - - (s64)cli->dump_time; - if (tdiff >= 0ll) { - list_del(&cli->list); - list_add(&cli->list, &expired_requests); - } else { - break; - } - } - - /* Fill data for each request found. */ - while (!list_empty(&expired_requests)) { - cli = list_first_entry(&expired_requests, - struct kbase_vinstr_client, list); - - /* Ensure that legacy buffer will not be used from - * this kthread context. */ - BUG_ON(0 == cli->buffer_count); - /* Expect only periodically sampled clients. */ - BUG_ON(0 == cli->dump_interval); - - /* Release the spinlock, as filling the data in client's - * userspace buffer could result in page faults. */ - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (!rcode) - kbasep_vinstr_update_client( - cli, - timestamp, - BASE_HWCNT_READER_EVENT_PERIODIC); - spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags); - - /* This client got suspended, move to the next one. */ - if (cli->suspended) - continue; - - /* Set new dumping time. Drop missed probing times. */ - do { - cli->dump_time += cli->dump_interval; - } while (cli->dump_time < timestamp); - - list_del(&cli->list); - kbasep_vinstr_add_dump_request( - cli, - &vinstr_ctx->waiting_clients); - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - /* Reprogram counters set if required. */ - kbasep_vinstr_reprogram(vinstr_ctx); - - mutex_unlock(&vinstr_ctx->lock); - } - - kfree(timer); - - return 0; -} - -/*****************************************************************************/ - -/** - * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers - * @cli: pointer to vinstr client structure - * - * Return: non-zero if client has at least one dumping buffer filled that was - * not notified to user yet - */ -static int kbasep_vinstr_hwcnt_reader_buffer_ready( - struct kbase_vinstr_client *cli) -{ - KBASE_DEBUG_ASSERT(cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @buffer: pointer to userspace buffer - * @size: size of buffer - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - struct kbase_vinstr_client *cli, void __user *buffer, - size_t size) -{ - unsigned int meta_idx = atomic_read(&cli->meta_idx); - unsigned int idx = meta_idx % cli->buffer_count; - - struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx]; - - /* Metadata sanity check. */ - KBASE_DEBUG_ASSERT(idx == meta->buffer_idx); - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - - /* Check if there is any buffer available. */ - if (atomic_read(&cli->write_idx) == meta_idx) - return -EAGAIN; - - /* Check if previously taken buffer was put back. */ - if (atomic_read(&cli->read_idx) != meta_idx) - return -EBUSY; - - /* Copy next available buffer's metadata to user. */ - if (copy_to_user(buffer, meta, size)) - return -EFAULT; - - atomic_inc(&cli->meta_idx); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @buffer: pointer to userspace buffer - * @size: size of buffer - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - struct kbase_vinstr_client *cli, void __user *buffer, - size_t size) -{ - unsigned int read_idx = atomic_read(&cli->read_idx); - unsigned int idx = read_idx % cli->buffer_count; - - struct kbase_hwcnt_reader_metadata meta; - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - - /* Check if any buffer was taken. */ - if (atomic_read(&cli->meta_idx) == read_idx) - return -EPERM; - - /* Check if correct buffer is put back. */ - if (copy_from_user(&meta, buffer, size)) - return -EFAULT; - if (idx != meta.buffer_idx) - return -EINVAL; - - atomic_inc(&cli->read_idx); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @interval: periodic dumping interval (disable periodic dumping if zero) - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - struct kbase_vinstr_client *cli, u32 interval) -{ - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - unsigned long flags; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (cli->suspended) { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); - return -ENOMEM; - } - - list_del(&cli->list); - - cli->dump_interval = interval; - - /* If interval is non-zero, enable periodic dumping for this client. */ - if (cli->dump_interval) { - if (DUMPING_RESOLUTION > cli->dump_interval) - cli->dump_interval = DUMPING_RESOLUTION; - cli->dump_time = - kbasep_vinstr_get_timestamp() + cli->dump_interval; - - kbasep_vinstr_add_dump_request( - cli, &vinstr_ctx->waiting_clients); - - atomic_set(&vinstr_ctx->request_pending, 1); - wake_up_all(&vinstr_ctx->waitq); - } else { - list_add(&cli->list, &vinstr_ctx->idle_clients); - } - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - mutex_unlock(&vinstr_ctx->lock); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id - * @event_id: id of event - * Return: event_mask or zero if event is not supported or maskable - */ -static u32 kbasep_vinstr_hwcnt_reader_event_mask( - enum base_hwcnt_reader_event event_id) -{ - u32 event_mask = 0; - - switch (event_id) { - case BASE_HWCNT_READER_EVENT_PREJOB: - case BASE_HWCNT_READER_EVENT_POSTJOB: - /* These event are maskable. */ - event_mask = (1 << event_id); - break; - - case BASE_HWCNT_READER_EVENT_MANUAL: - case BASE_HWCNT_READER_EVENT_PERIODIC: - /* These event are non-maskable. */ - default: - /* These event are not supported. */ - break; - } - - return event_mask; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @event_id: id of event to enable - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 event_mask; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); - if (!event_mask) - return -EINVAL; - - mutex_lock(&vinstr_ctx->lock); - cli->event_mask |= event_mask; - mutex_unlock(&vinstr_ctx->lock); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @event_id: id of event to disable - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 event_mask; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id); - if (!event_mask) - return -EINVAL; - - mutex_lock(&vinstr_ctx->lock); - cli->event_mask &= ~event_mask; - mutex_unlock(&vinstr_ctx->lock); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command - * @cli: pointer to vinstr client structure - * @hwver: pointer to user buffer where hw version will be stored - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - struct kbase_vinstr_client *cli, u32 __user *hwver) -{ -#ifndef CONFIG_MALI_NO_MALI - struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; -#endif - - u32 ver = 5; - -#ifndef CONFIG_MALI_NO_MALI - KBASE_DEBUG_ASSERT(vinstr_ctx); - if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) - ver = 4; -#endif - - return put_user(ver, hwver); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl - * @filp: pointer to file structure - * @cmd: user command - * @arg: command's argument - * - * Return: zero on success - */ -static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, - unsigned int cmd, unsigned long arg) -{ - long rcode = 0; + long rcode; struct kbase_vinstr_client *cli; - KBASE_DEBUG_ASSERT(filp); + if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); - - if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd))) + if (!cli) return -EINVAL; switch (cmd) { @@ -1665,42 +857,41 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, break; case KBASE_HWCNT_READER_GET_HWVER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - cli, (u32 __user *)arg); + cli, (u32 __user *)arg); break; case KBASE_HWCNT_READER_GET_BUFFER_SIZE: - KBASE_DEBUG_ASSERT(cli->vinstr_ctx); rcode = put_user( - (u32)cli->vinstr_ctx->dump_size, - (u32 __user *)arg); + (u32)cli->vctx->metadata->dump_buf_bytes, + (u32 __user *)arg); break; case KBASE_HWCNT_READER_DUMP: - rcode = kbase_vinstr_hwc_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL); + rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); break; case KBASE_HWCNT_READER_CLEAR: - rcode = kbase_vinstr_hwc_clear(cli); + rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); break; case KBASE_HWCNT_READER_GET_BUFFER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); + cli, (void __user *)arg, _IOC_SIZE(cmd)); break; case KBASE_HWCNT_READER_PUT_BUFFER: rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); + cli, (void __user *)arg, _IOC_SIZE(cmd)); break; case KBASE_HWCNT_READER_SET_INTERVAL: rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - cli, (u32)arg); + cli, (u32)arg); break; case KBASE_HWCNT_READER_ENABLE_EVENT: rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - cli, (enum base_hwcnt_reader_event)arg); + cli, (enum base_hwcnt_reader_event)arg); break; case KBASE_HWCNT_READER_DISABLE_EVENT: rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - cli, (enum base_hwcnt_reader_event)arg); + cli, (enum base_hwcnt_reader_event)arg); break; default: + WARN_ON(true); rcode = -EINVAL; break; } @@ -1709,21 +900,25 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp, } /** - * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll - * @filp: pointer to file structure - * @wait: pointer to poll table - * Return: POLLIN if data can be read without blocking, otherwise zero + * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. + * @filp: Non-NULL pointer to file structure. + * @wait: Non-NULL pointer to poll table. + * + * Return: POLLIN if data can be read without blocking, 0 if data can not be + * read without blocking, else error code. */ -static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, - poll_table *wait) +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait) { struct kbase_vinstr_client *cli; - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(wait); + if (!filp || !wait) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); + if (!cli) + return -EINVAL; poll_wait(filp, &cli->waitq, wait); if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) @@ -1732,25 +927,28 @@ static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp, } /** - * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap - * @filp: pointer to file structure - * @vma: pointer to vma structure - * Return: zero on success + * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. + * @filp: Non-NULL pointer to file structure. + * @vma: Non-NULL pointer to vma structure. + * + * Return: 0 on success, else error code. */ -static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, - struct vm_area_struct *vma) +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma) { struct kbase_vinstr_client *cli; - unsigned long size, addr, pfn, offset; - unsigned long vm_size = vma->vm_end - vma->vm_start; + unsigned long vm_size, size, addr, pfn, offset; - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(vma); + if (!filp || !vma) + return -EINVAL; cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); + if (!cli) + return -EINVAL; - size = cli->buffer_count * cli->dump_size; + vm_size = vma->vm_end - vma->vm_start; + size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; @@ -1759,577 +957,33 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, if (vm_size > size - offset) return -EINVAL; - addr = __pa((unsigned long)cli->dump_buffers + offset); + addr = __pa(cli->dump_bufs.page_addr + offset); pfn = addr >> PAGE_SHIFT; return remap_pfn_range( - vma, - vma->vm_start, - pfn, - vm_size, - vma->vm_page_prot); + vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); } /** - * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release - * @inode: pointer to inode structure - * @filp: pointer to file structure - * Return always return zero + * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. + * @inode: Non-NULL pointer to inode structure. + * @filp: Non-NULL pointer to file structure. + * + * Return: 0 always. */ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, - struct file *filp) -{ - struct kbase_vinstr_client *cli; - - KBASE_DEBUG_ASSERT(inode); - KBASE_DEBUG_ASSERT(filp); - - cli = filp->private_data; - KBASE_DEBUG_ASSERT(cli); - - kbase_vinstr_detach_client(cli); - return 0; -} - -/*****************************************************************************/ - -/** - * kbasep_vinstr_kick_scheduler - trigger scheduler cycle - * @kbdev: pointer to kbase device structure - */ -static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - unsigned long flags; - - down(&js_devdata->schedule_sem); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_backend_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - up(&js_devdata->schedule_sem); -} - -/** - * kbasep_vinstr_suspend_worker - worker suspending vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_suspend_worker(struct work_struct *data) -{ - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - suspend_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - disable_hwcnt(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_SUSPENDED; - wake_up_all(&vinstr_ctx->suspend_waitq); - - if (vinstr_ctx->need_resume) { - vinstr_ctx->need_resume = false; - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - } else { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * This must happen after vinstr was suspended. - */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); - } -} - -/** - * kbasep_vinstr_resume_worker - worker resuming vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_resume_worker(struct work_struct *data) -{ - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - resume_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - enable_hwcnt(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); - - if (vinstr_ctx->need_suspend) { - vinstr_ctx->need_suspend = false; - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - } else { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * Note that scheduler state machine might requested re-entry to - * protected mode before vinstr was resumed. - * This must happen after vinstr was release. - */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); - } -} - -/*****************************************************************************/ - -struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) -{ - struct kbase_vinstr_context *vinstr_ctx; - - vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL); - if (!vinstr_ctx) - return NULL; - - INIT_LIST_HEAD(&vinstr_ctx->idle_clients); - INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); - INIT_LIST_HEAD(&vinstr_ctx->suspended_clients); - mutex_init(&vinstr_ctx->lock); - spin_lock_init(&vinstr_ctx->state_lock); - vinstr_ctx->kbdev = kbdev; - vinstr_ctx->thread = NULL; - vinstr_ctx->state = VINSTR_IDLE; - vinstr_ctx->suspend_cnt = 0; - INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); - INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); - init_waitqueue_head(&vinstr_ctx->suspend_waitq); - - atomic_set(&vinstr_ctx->request_pending, 0); - init_waitqueue_head(&vinstr_ctx->waitq); - - return vinstr_ctx; -} - -void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) -{ - struct kbase_vinstr_client *cli; - - /* Stop service thread first. */ - if (vinstr_ctx->thread) - kthread_stop(vinstr_ctx->thread); - - /* Wait for workers. */ - flush_work(&vinstr_ctx->suspend_work); - flush_work(&vinstr_ctx->resume_work); - - while (1) { - struct list_head *list = &vinstr_ctx->idle_clients; - - if (list_empty(list)) { - list = &vinstr_ctx->waiting_clients; - if (list_empty(list)) { - list = &vinstr_ctx->suspended_clients; - if (list_empty(list)) - break; - } - } - - cli = list_first_entry(list, struct kbase_vinstr_client, list); - list_del(&cli->list); - if (!cli->suspended) - vinstr_ctx->nclients--; - else - vinstr_ctx->nclients_suspended--; - kfree(cli->accum_buffer); - kfree(cli); - } - KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients); - KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended); - if (vinstr_ctx->kctx) - kbasep_vinstr_destroy_kctx(vinstr_ctx); - kfree(vinstr_ctx); -} - -int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - struct kbase_vinstr_client *cli; - u32 bitmap[4]; - int fd; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(setup); - KBASE_DEBUG_ASSERT(setup->buffer_count); - - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; - - cli = kbasep_vinstr_attach_client( - vinstr_ctx, - setup->buffer_count, - bitmap, - &fd, - NULL); - - if (!cli) - return -ENOMEM; - - kbase_vinstr_wait_for_ready(vinstr_ctx); - return fd; -} - -int kbase_vinstr_legacy_hwc_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client **cli, - struct kbase_ioctl_hwcnt_enable *enable) + struct file *filp) { - KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(enable); - KBASE_DEBUG_ASSERT(cli); - - if (enable->dump_buffer) { - u32 bitmap[4]; + struct kbase_vinstr_client *vcli = filp->private_data; - bitmap[SHADER_HWCNT_BM] = enable->shader_bm; - bitmap[TILER_HWCNT_BM] = enable->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = enable->jm_bm; + mutex_lock(&vcli->vctx->lock); - if (*cli) - return -EBUSY; + vcli->vctx->client_count--; + list_del(&vcli->node); - *cli = kbasep_vinstr_attach_client( - vinstr_ctx, - 0, - bitmap, - (void *)(uintptr_t)enable->dump_buffer, - NULL); + mutex_unlock(&vcli->vctx->lock); - if (!(*cli)) - return -ENOMEM; - - kbase_vinstr_wait_for_ready(vinstr_ctx); - } else { - if (!*cli) - return -EINVAL; - - kbase_vinstr_detach_client(*cli); - *cli = NULL; - } + kbasep_vinstr_client_destroy(vcli); return 0; } - -struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - void *kernel_buffer) -{ - struct kbase_vinstr_client *kernel_client; - u32 bitmap[4]; - - if (!vinstr_ctx || !setup || !kernel_buffer) - return NULL; - - bitmap[SHADER_HWCNT_BM] = setup->shader_bm; - bitmap[TILER_HWCNT_BM] = setup->tiler_bm; - bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; - bitmap[JM_HWCNT_BM] = setup->jm_bm; - - kernel_client = kbasep_vinstr_attach_client( - vinstr_ctx, - 0, - bitmap, - NULL, - kernel_buffer); - - if (kernel_client) - kbase_vinstr_wait_for_ready(vinstr_ctx); - - return kernel_client; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); - -int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - int rcode = 0; - struct kbase_vinstr_context *vinstr_ctx; - u64 timestamp; - u32 event_mask; - - if (!cli) - return -EINVAL; - - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT); - event_mask = 1 << event_id; - - mutex_lock(&vinstr_ctx->lock); - - if (event_mask & cli->event_mask) { - rcode = kbasep_vinstr_collect_and_accumulate( - vinstr_ctx, - ×tamp); - if (rcode) - goto exit; - - rcode = kbasep_vinstr_update_client(cli, timestamp, event_id); - if (rcode) - goto exit; - - kbasep_vinstr_reprogram(vinstr_ctx); - } - -exit: - mutex_unlock(&vinstr_ctx->lock); - - return rcode; -} -KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); - -int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) -{ - struct kbase_vinstr_context *vinstr_ctx; - int rcode; - u64 unused; - - if (!cli) - return -EINVAL; - - vinstr_ctx = cli->vinstr_ctx; - KBASE_DEBUG_ASSERT(vinstr_ctx); - - mutex_lock(&vinstr_ctx->lock); - - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); - if (rcode) - goto exit; - rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx); - if (rcode) - goto exit; - memset(cli->accum_buffer, 0, cli->dump_size); - - kbasep_vinstr_reprogram(vinstr_ctx); - -exit: - mutex_unlock(&vinstr_ctx->lock); - - return rcode; -} - -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - int ret = -EAGAIN; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - vinstr_ctx->suspend_cnt++; - /* overflow shall not happen */ - BUG_ON(0 == vinstr_ctx->suspend_cnt); - ret = 0; - break; - - case VINSTR_IDLE: - if (vinstr_ctx->clients_present) { - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - } else { - vinstr_ctx->state = VINSTR_SUSPENDED; - - vinstr_ctx->suspend_cnt++; - /* overflow shall not happen */ - WARN_ON(0 == vinstr_ctx->suspend_cnt); - ret = 0; - } - break; - - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_SUSPENDING; - break; - - case VINSTR_RESUMING: - vinstr_ctx->need_suspend = true; - break; - - case VINSTR_SUSPENDING: - break; - - default: - KBASE_DEBUG_ASSERT(0); - break; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - return ret; -} - -static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - int ret = -EAGAIN; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - case VINSTR_RESUMING: - case VINSTR_SUSPENDING: - break; - - case VINSTR_IDLE: - case VINSTR_DUMPING: - ret = 0; - break; - default: - KBASE_DEBUG_ASSERT(0); - break; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - return ret; -} - -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - wait_event(vinstr_ctx->suspend_waitq, - (0 == kbase_vinstr_try_suspend(vinstr_ctx))); -} - -void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx) -{ - wait_event(vinstr_ctx->suspend_waitq, - (0 == kbase_vinstr_is_ready(vinstr_ctx))); -} -KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready); - -/** - * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending - * on nclients - * @vinstr_ctx: vinstr context pointer - * - * This function should be called whenever vinstr_ctx->nclients changes. This - * may cause vinstr to be suspended or resumed, depending on the number of - * clients and whether IPA is suspended or not. - */ -static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - lockdep_assert_held(&vinstr_ctx->state_lock); - - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) { - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - } - break; - - case VINSTR_SUSPENDING: - if (vinstr_ctx->nclients) - vinstr_ctx->need_resume = true; - break; - - case VINSTR_IDLE: - if (!vinstr_ctx->nclients) { - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - } - break; - - case VINSTR_DUMPING: - if (!vinstr_ctx->nclients) - vinstr_ctx->state = VINSTR_SUSPENDING; - break; - - case VINSTR_RESUMING: - if (!vinstr_ctx->nclients) - vinstr_ctx->need_suspend = true; - break; - } -} - -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); - if (VINSTR_SUSPENDED == vinstr_ctx->state) { - BUG_ON(0 == vinstr_ctx->suspend_cnt); - vinstr_ctx->suspend_cnt--; - if (0 == vinstr_ctx->suspend_cnt) { - if (vinstr_ctx->clients_present) { - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - } else { - vinstr_ctx->state = VINSTR_IDLE; - } - } - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} - -void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client) -{ - struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; - unsigned long flags; - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (!client->suspended) { - list_del(&client->list); - list_add(&client->list, &vinstr_ctx->suspended_clients); - - vinstr_ctx->nclients--; - vinstr_ctx->nclients_suspended++; - kbase_vinstr_update_suspend(vinstr_ctx); - - client->suspended = true; - } - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} - -void kbase_vinstr_resume_client(struct kbase_vinstr_client *client) -{ - struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx; - unsigned long flags; - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - - if (client->suspended) { - list_del(&client->list); - list_add(&client->list, &vinstr_ctx->idle_clients); - - vinstr_ctx->nclients++; - vinstr_ctx->nclients_suspended--; - kbase_vinstr_update_suspend(vinstr_ctx); - - client->suspended = false; - } - - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); -} diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index d32799f74084..81d315f95567 100755 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -20,163 +20,72 @@ * */ +/* + * Vinstr, used to provide an ioctl for userspace access to periodic hardware + * counters. + */ + #ifndef _KBASE_VINSTR_H_ #define _KBASE_VINSTR_H_ -#include -#include - -/*****************************************************************************/ - struct kbase_vinstr_context; -struct kbase_vinstr_client; - -/*****************************************************************************/ - -/** - * kbase_vinstr_init() - initialize the vinstr core - * @kbdev: kbase device - * - * Return: pointer to the vinstr context on success or NULL on failure - */ -struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); - -/** - * kbase_vinstr_term() - terminate the vinstr core - * @vinstr_ctx: vinstr context - */ -void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader - * @vinstr_ctx: vinstr context - * @setup: reader's configuration - * - * Return: file descriptor on success and a (negative) error code otherwise - */ -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup); +struct kbase_hwcnt_virtualizer; +struct kbase_ioctl_hwcnt_reader_setup; /** - * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping - * @vinstr_ctx: vinstr context - * @cli: pointer where to store pointer to new vinstr client structure - * @enable: hwc configuration + * kbase_vinstr_init() - Initialise a vinstr context. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr + * context will be stored on success. * - * Return: zero on success - */ -int kbase_vinstr_legacy_hwc_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_vinstr_client **cli, - struct kbase_ioctl_hwcnt_enable *enable); - -/** - * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side - * client - * @vinstr_ctx: vinstr context - * @setup: reader's configuration - * @kernel_buffer: pointer to dump buffer + * On creation, the suspend count of the context will be 0. * - * setup->buffer_count is not used for kernel side clients. - * - * Return: pointer to client structure, or NULL on failure + * Return: 0 on success, else error code. */ -struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - void *kernel_buffer); +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx); /** - * kbase_vinstr_hwc_dump - issue counter dump for vinstr client - * @cli: pointer to vinstr client - * @event_id: id of event that triggered hwcnt dump - * - * Return: zero on success + * kbase_vinstr_term() - Terminate a vinstr context. + * @vctx: Pointer to the vinstr context to be terminated. */ -int kbase_vinstr_hwc_dump( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id); +void kbase_vinstr_term(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for - * a given kbase context - * @cli: pointer to vinstr client + * kbase_vinstr_suspend() - Increment the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be suspended. * - * Return: zero on success + * After this function call returns, it is guaranteed that all timers and + * workers in vinstr will be cancelled, and will not be re-triggered until + * after the context has been resumed. In effect, this means no new counter + * dumps will occur for any existing or subsequently added periodic clients. */ -int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Return: 0 on success, or negative if state change is in progress + * kbase_vinstr_resume() - Decrement the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be resumed. * - * Warning: This API call is non-generic. It is meant to be used only by - * job scheduler state machine. + * If a call to this function decrements the suspend count from 1 to 0, then + * normal operation of vinstr will be resumed (i.e. counter dumps will once + * again be automatically triggered for all periodic clients). * - * Function initiates vinstr switch to suspended state. Once it was called - * vinstr enters suspending state. If function return non-zero value, it - * indicates that state switch is not complete and function must be called - * again. On state switch vinstr will trigger job scheduler state machine - * cycle. - */ -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_suspend - suspends operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Function initiates vinstr switch to suspended state. Then it blocks until - * operation is completed. - */ -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready - * @vinstr_ctx: vinstr context - * - * Function waits for the vinstr to become ready for dumping. It can be in the - * resuming state after the client was attached but the client currently expects - * that vinstr is ready for dumping immediately post attach. - */ -void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_resume - resumes operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Function can be called only if it was preceded by a successful call + * It is only valid to call this function one time for each prior returned call * to kbase_vinstr_suspend. */ -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); /** - * kbase_vinstr_dump_size - Return required size of dump buffer - * @kbdev: device pointer + * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader + * client. + * @vinstr_ctx: Non-NULL pointer to the vinstr context. + * @setup: Non-NULL pointer to the hwcnt reader configuration. * - * Return : buffer size in bytes + * Return: file descriptor on success, else a (negative) error code. */ -size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); - -/** - * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @cli: pointer to vinstr client - */ -void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); - -/** - * kbase_vinstr_suspend_client - Suspend vinstr client - * @client: pointer to vinstr client - */ -void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client); - -/** - * kbase_vinstr_resume_client - Resume vinstr client - * @client: pointer to vinstr client - */ -void kbase_vinstr_resume_client(struct kbase_vinstr_client *client); +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_ioctl_hwcnt_reader_setup *setup); #endif /* _KBASE_VINSTR_H_ */ - diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h index da2ffaffccc7..6c6a8c6a5b43 100755 --- a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h +++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,7 +154,6 @@ DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); -DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); @@ -176,11 +175,10 @@ DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); -DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED); DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE); -DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h index f17bd5edf7e1..3a4db10bdb3d 100755 --- a/drivers/gpu/arm/midgard/mali_malisw.h +++ b/drivers/gpu/arm/midgard/mali_malisw.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -82,15 +82,6 @@ */ #define CSTD_NOP(...) ((void)#__VA_ARGS__) -/** - * Function-like macro for converting a pointer in to a u64 for storing into - * an external data structure. This is commonly used when pairing a 32-bit - * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion - * is complex and a straight cast does not work reliably as pointers are - * often considered as signed. - */ -#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x))) - /** * @hideinitializer * Function-like macro for stringizing a single level macro. @@ -115,22 +106,4 @@ */ #define CSTD_STR2(x) CSTD_STR1(x) -/** - * Specify an assertion value which is evaluated at compile time. Recommended - * usage is specification of a @c static @c INLINE function containing all of - * the assertions thus: - * - * @code - * static INLINE [module]_compile_time_assertions( void ) - * { - * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); - * } - * @endcode - * - * @note Use @c static not @c STATIC. We never want to turn off this @c static - * specification for testing purposes. - */ -#define CSTD_COMPILE_TIME_ASSERT(expr) \ - do { switch (0) { case 0: case (expr):; } } while (false) - #endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index 180850069f2e..0f03e8d216ab 100755 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -45,6 +45,7 @@ #define GPU_IRQ_MASK 0x028 /* (RW) */ #define GPU_IRQ_STATUS 0x02C /* (RO) */ + /* IRQ flags */ #define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ #define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ @@ -215,6 +216,9 @@ #define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ #define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ +/* JOB IRQ flags */ +#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ + #define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ #define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ #define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ @@ -377,14 +381,14 @@ /* * Begin TRANSCFG register values */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2 << 28) -#define AS_TRANSCFG_PTW_SH_IS (3 << 28) - +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) /* * Begin Command Values */ @@ -497,7 +501,7 @@ #define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ #define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ -/* AS_MEMATTR values: */ +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull /* The attribute set to force all resources to be cached. */ @@ -509,6 +513,12 @@ #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull /* Set to write back memory, outer caching */ #define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull @@ -520,6 +530,11 @@ #define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull /* Set to write back memory, outer caching */ #define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED /* Symbols for default MEMATTR to use * Default is - HW implementation defined caching */ @@ -536,6 +551,8 @@ #define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 /* Outer coherent, write alloc inner */ #define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 /* JS_FEATURES register */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h deleted file mode 100755 index d0deeadf479f..000000000000 --- a/drivers/gpu/arm/midgard/mali_timeline.h +++ /dev/null @@ -1,401 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mali_timeline - -#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _MALI_TIMELINE_H - -#include - -TRACE_EVENT(mali_timeline_atoms_in_flight, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int tgid, - int count), - - TP_ARGS(ts_sec, - ts_nsec, - tgid, - count), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, tgid) - __field(int, count) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->tgid = tgid; - __entry->count = count; - ), - - TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->tgid, - __entry->count) -); - - -TRACE_EVENT(mali_timeline_atom, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int tgid, - int atom_id), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - tgid, - atom_id), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, tgid) - __field(int, atom_id) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->tgid = tgid; - __entry->atom_id = atom_id; - ), - - TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->tgid, - __entry->atom_id, - __entry->atom_id) -); - -TRACE_EVENT(mali_timeline_gpu_slot_active, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int tgid, - int js, - int count), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - tgid, - js, - count), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, tgid) - __field(int, js) - __field(int, count) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->tgid = tgid; - __entry->js = js; - __entry->count = count; - ), - - TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->tgid, - __entry->js, - __entry->count) -); - -TRACE_EVENT(mali_timeline_gpu_slot_action, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int tgid, - int js, - int count), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - tgid, - js, - count), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, tgid) - __field(int, js) - __field(int, count) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->tgid = tgid; - __entry->js = js; - __entry->count = count; - ), - - TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->tgid, - __entry->js, - __entry->count) -); - -TRACE_EVENT(mali_timeline_gpu_power_active, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int active), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - active), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, active) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->active = active; - ), - - TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->active) - -); - -TRACE_EVENT(mali_timeline_l2_power_active, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int state), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - state), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, state) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->state = state; - ), - - TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->state) - -); -TRACE_EVENT(mali_timeline_pm_event, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int pm_event_type, - unsigned int pm_event_id), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - pm_event_type, - pm_event_id), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, pm_event_type) - __field(unsigned int, pm_event_id) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->pm_event_type = pm_event_type; - __entry->pm_event_id = pm_event_id; - ), - - TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->pm_event_type, __entry->pm_event_id) - -); - -TRACE_EVENT(mali_timeline_slot_atom, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int event_type, - int tgid, - int js, - int atom_id), - - TP_ARGS(ts_sec, - ts_nsec, - event_type, - tgid, - js, - atom_id), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, event_type) - __field(int, tgid) - __field(int, js) - __field(int, atom_id) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->event_type = event_type; - __entry->tgid = tgid; - __entry->js = js; - __entry->atom_id = atom_id; - ), - - TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->tgid, - __entry->js, - __entry->atom_id) -); - -TRACE_EVENT(mali_timeline_pm_checktrans, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int trans_code, - int trans_id), - - TP_ARGS(ts_sec, - ts_nsec, - trans_code, - trans_id), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, trans_code) - __field(int, trans_id) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->trans_code = trans_code; - __entry->trans_id = trans_id; - ), - - TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->trans_id) - -); - -TRACE_EVENT(mali_timeline_context_active, - - TP_PROTO(u64 ts_sec, - u32 ts_nsec, - int count), - - TP_ARGS(ts_sec, - ts_nsec, - count), - - TP_STRUCT__entry( - __field(u64, ts_sec) - __field(u32, ts_nsec) - __field(int, count) - ), - - TP_fast_assign( - __entry->ts_sec = ts_sec; - __entry->ts_nsec = ts_nsec; - __entry->count = count; - ), - - TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE, - (int)__entry->ts_sec, - (int)__entry->ts_nsec, - __entry->count) -); - -#endif /* _MALI_TIMELINE_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . - -/* This part must be outside protection */ -#include - diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h index 961a4a5c63eb..701f3909042f 100755 --- a/drivers/gpu/arm/midgard/mali_uk.h +++ b/drivers/gpu/arm/midgard/mali_uk.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -44,7 +44,7 @@ extern "C" { * @defgroup uk_api User-Kernel Interface API * * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device - * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver. + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. * * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent * kernel-side API (UKK) via an OS-specific communication mechanism. @@ -74,68 +74,6 @@ enum uk_client_id { UK_CLIENT_COUNT }; -/** - * Each function callable through the UK interface has a unique number. - * Functions provided by UK clients start from number UK_FUNC_ID. - * Numbers below UK_FUNC_ID are used for internal UK functions. - */ -enum uk_func { - UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ - /** - * Each UK client numbers the functions they provide starting from - * number UK_FUNC_ID. This number is then eventually assigned to the - * id field of the union uk_header structure when preparing to make a - * UK call. See your UK client for a list of their function numbers. - */ - UK_FUNC_ID = 512 -}; - -/** - * Arguments for a UK call are stored in a structure. This structure consists - * of a fixed size header and a payload. The header carries a 32-bit number - * identifying the UK function to be called (see uk_func). When the UKK client - * receives this header and executed the requested UK function, it will use - * the same header to store the result of the function in the form of a - * int return code. The size of this structure is such that the - * first member of the payload following the header can be accessed efficiently - * on a 32 and 64-bit kernel and the structure has the same size regardless - * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined - * accordingly in the OS specific mali_uk_os.h header file. - */ -union uk_header { - /** - * 32-bit number identifying the UK function to be called. - * Also see uk_func. - */ - u32 id; - /** - * The int return code returned by the called UK function. - * See the specification of the particular UK function you are - * calling for the meaning of the error codes returned. All - * UK functions return 0 on success. - */ - u32 ret; - /* - * Used to ensure 64-bit alignment of this union. Do not remove. - * This field is used for padding and does not need to be initialized. - */ - u64 sizer; -}; - -/** - * This structure carries a 16-bit major and minor number and is sent along with an internal UK call - * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. - */ -struct uku_version_check_args { - union uk_header header; - /**< UK call header */ - u16 major; - /**< This field carries the user-side major version on input and the kernel-side major version on output */ - u16 minor; - /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ - u8 padding[4]; -}; - /** @} end group uk_api */ /** @} *//* end group base_api */ diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c index d131a4bb0934..d3ff22c1de41 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c @@ -1,3 +1,20 @@ +/* + * mali_clock.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + #include #include #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h index 9b8b39287a3f..939d3b43b224 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h @@ -1,3 +1,20 @@ +/* + * mali_clock.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + #ifndef __MALI_CLOCK_H__ #define __MALI_CLOCK_H__ #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c index d7dd24641d0e..7ce3539826fe 100755 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -1,26 +1,20 @@ /* + * mali_kbase_config_devicetree.c * - * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * */ - #ifdef CONFIG_DEVFREQ_THERMAL #include #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h index 1c5601544ab2..233a18ebfaa2 100755 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -1,28 +1,20 @@ /* + * mali_kbase_config_platform.h * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * */ - - - /** * Maximum frequency GPU will be clocked at. Given in kHz. * This must be specified as there is no default value. diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c index dc1654f2a6df..7658626000c9 100755 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.c @@ -1,6 +1,20 @@ -/** - ** Meson hardware specific initialization - **/ +/* + * mali_kbase_meson.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + #include #include #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h index 89ff21bb559b..5e69f3327582 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_meson.h @@ -1,4 +1,19 @@ - +/* + * mali_kbase_meson.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ #include #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c index a6ef90fb8828..83c9c5b5930d 100755 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,22 +1,17 @@ /* + * mali_kbase_runtime_pm.c * - * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * */ @@ -41,8 +36,8 @@ static int first = 1; #define RESET2_LEVEL 0x22 #define Rd(r) readl((reg_base_hiubus) + ((r)<<2)) #define Wr(r, v) writel((v), ((reg_base_hiubus) + ((r)<<2))) -#define Mali_WrReg(regnum, value) kbase_reg_write(kbdev, (regnum), (value), NULL) -#define Mali_RdReg(regnum) kbase_reg_read(kbdev, (regnum), NULL) +#define Mali_WrReg(regnum, value) kbase_reg_write(kbdev, (regnum), (value)) +#define Mali_RdReg(regnum) kbase_reg_read(kbdev, (regnum)) #define stimulus_print printk #define stimulus_display printk #define Mali_pwr_off(x) Mali_pwr_off_with_kdev(kbdev, (x)) @@ -192,8 +187,8 @@ static int pm_callback_power_on(struct kbase_device *kbdev) udelay(10); // OR POLL for reset done - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819, NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16)); Mali_pwr_on_with_kdev(kbdev, 0x1); //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n", diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h index 41185d024931..724112f9c5a3 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h @@ -1,8 +1,18 @@ /* * mali_platform.h * - * Created on: Nov 8, 2013 - * Author: amlogic + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * */ #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h index b541090ad442..aaab5dae8b6b 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h @@ -1,11 +1,18 @@ /* - * Copyright (C) 2013 ARM Limited. All rights reserved. + * mali_scaling.h * - * This program is free software and is provided to you under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * A copy of the licence is included with the program, and can also be obtained from Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** diff --git a/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c index 64de2d5eb823..09feaaf1cb70 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c @@ -1,8 +1,17 @@ /* - * Copyright (C) 2010, 2012-2014 Amlogic Limited. All rights reserved. + * meson_main2.c * - * This program is free software and is provided to you under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * */ diff --git a/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h index 65f8e147995d..a7b476933144 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h @@ -1,8 +1,18 @@ /* - * mali_platform.h + * meson_main2.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * Created on: Nov 8, 2013 - * Author: amlogic */ #ifndef MESON_MAIN_H_ diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c index 831ae72e0d8a..6cbeb0cb2043 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c @@ -1,13 +1,20 @@ -/******************************************************************* +/* + * mpgpu.c * - * Copyright C 2013 by Amlogic, Inc. All Rights Reserved. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. * - * Description: + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * Author: Amlogic Software - * Created: 2010/4/1 19:46 + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - *******************************************************************/ + */ + /* Standard Linux headers */ #include #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c index 399730076202..ac49caa3b945 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c @@ -1,10 +1,18 @@ /* - * platform.c + * platform_gx.c * - * clock source setting and resource config + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * Created on: Dec 4, 2013 - * Author: amlogic */ #include diff --git a/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/drivers/gpu/arm/midgard/platform/devicetree/scaling.c index aefb3e4583c2..081a97b07a95 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/scaling.c +++ b/drivers/gpu/arm/midgard/platform/devicetree/scaling.c @@ -1,11 +1,18 @@ /* - * Copyright (C) 2013 ARM Limited. All rights reserved. + * scaling.c * - * This program is free software and is provided to you under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence. + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * A copy of the licence is included with the program, and can also be obtained from Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /** @@ -36,7 +43,7 @@ static int lastStep; static struct work_struct wq_work; static mali_plat_info_t* pmali_plat = NULL; #endif -static int scaling_mode = MALI_SCALING_DISABLE; +static int scaling_mode = MALI_PP_FS_SCALING; extern int mali_pm_statue; //static int scaling_mode = MALI_SCALING_DISABLE; //static int scaling_mode = MALI_PP_SCALING; diff --git a/drivers/gpu/arm/midgard/sconscript b/drivers/gpu/arm/midgard/sconscript index 4c38f2a07045..f9d9c1bb243f 100755 --- a/drivers/gpu/arm/midgard/sconscript +++ b/drivers/gpu/arm/midgard/sconscript @@ -50,8 +50,8 @@ make_args = env.kernel_get_config_defines(ret_list = True) + [ 'MALI_KERNEL_TEST_API=%s' % env['debug'], 'MALI_UNIT_TEST=%s' % env['unit'], 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], - 'MALI_MOCK_TEST=%s' % mock_test, 'MALI_CUSTOMER_RELEASE=%s' % env['release'], + 'MALI_USE_CSF=%s' % env['csf'], 'MALI_COVERAGE=%s' % env['coverage'], ] diff --git a/drivers/gpu/arm/midgard/tests/Mconfig b/drivers/gpu/arm/midgard/tests/Mconfig index f692e3413ac2..af4e383badb3 100755 --- a/drivers/gpu/arm/midgard/tests/Mconfig +++ b/drivers/gpu/arm/midgard/tests/Mconfig @@ -20,3 +20,13 @@ config BUILD_IPA_TESTS bool default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ default n + +config BUILD_IPA_UNIT_TESTS + bool + default y if NO_MALI && BUILD_IPA_TESTS + default n + +config BUILD_CSF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF + default n diff --git a/drivers/gpu/arm/midgard/tests/build.bp b/drivers/gpu/arm/midgard/tests/build.bp index 28a756b16dc1..a0823c7c0c64 100755 --- a/drivers/gpu/arm/midgard/tests/build.bp +++ b/drivers/gpu/arm/midgard/tests/build.bp @@ -19,18 +19,4 @@ bob_defaults { "kernel/drivers/gpu/arm/midgard/backend/gpu", "kernel/drivers/gpu/arm/midgard/tests/include", ], - extra_symbols: ["kutf"], } - -subdirs = [ - "kutf", - "mali_kutf_irq_test", -] - -optional_subdirs = [ - "kutf_test", - "kutf_test_runner", - "mali_kutf_ipa_test", - "mali_kutf_ipa_unit_test", - "mali_kutf_vinstr_test", -] diff --git a/drivers/gpu/arm/midgard/tests/kutf/build.bp b/drivers/gpu/arm/midgard/tests/kutf/build.bp index f6d4c3fc7e15..960c8faa8df9 100755 --- a/drivers/gpu/arm/midgard/tests/kutf/build.bp +++ b/drivers/gpu/arm/midgard/tests/kutf/build.bp @@ -25,7 +25,7 @@ bob_kernel_module { kbuild_options: ["CONFIG_MALI_KUTF=m"], include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"], enabled: false, - unit_test_kernel_modules: { + base_build_kutf: { enabled: true, }, } diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c index 1c350bb339fb..f3a8e9b23f4c 100755 --- a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -242,8 +242,6 @@ static void kutf_add_explicit_result(struct kutf_context *context) { switch (context->expected_status) { case KUTF_RESULT_UNKNOWN: - if (context->status == KUTF_RESULT_UNKNOWN) - kutf_test_pass(context, "(implicit pass)"); break; case KUTF_RESULT_WARN: diff --git a/drivers/gpu/arm/midgard/tests/kutf/sconscript b/drivers/gpu/arm/midgard/tests/kutf/sconscript index 98f64468dac9..4590d1af34db 100755 --- a/drivers/gpu/arm/midgard/tests/kutf/sconscript +++ b/drivers/gpu/arm/midgard/tests/kutf/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2014-2016, 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ Import('kutf_env') -make_args = kutf_env.kernel_get_config_defines(ret_list = True) +make_args = kutf_env.kernel_get_config_defines(ret_list = True, extra_cflags = ['-DCONFIG_MALI_KUTF'], extra_configs = ['CONFIG_MALI_KUTF=m']) mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args) kutf_env.KernelObjTarget('kutf', mod) diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile index 40df1179b86b..9218a40f8069 100755 --- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile +++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile @@ -31,6 +31,7 @@ endif TEST_CCFLAGS := \ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ $(SCONS_CFLAGS) \ -I$(CURDIR)/../include \ -I$(CURDIR)/../../../../../../include \ diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp index e1f77b0c5d7b..66f4eb3c4e90 100755 --- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp +++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp @@ -17,10 +17,12 @@ bob_kernel_module { "Kbuild", "mali_kutf_irq_test_main.c", ], - extra_symbols: ["mali_kbase"], - install_group: "IG_tests", + extra_symbols: [ + "mali_kbase", + "kutf", + ], enabled: false, - unit_test_kernel_modules: { + base_build_kutf: { enabled: true, kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], }, diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index 5013a9d7cf89..4181b7f92db6 100755 --- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2016, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -90,15 +90,14 @@ static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) struct kbase_device *kbdev = kbase_untag(data); u32 val; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); if (val & TEST_IRQ) { struct timespec tval; getnstimeofday(&tval); irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, - NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); triggered = true; wake_up(&wait); @@ -194,7 +193,7 @@ static void mali_kutf_irq_latency(struct kutf_context *context) /* Trigger fake IRQ */ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - TEST_IRQ, NULL); + TEST_IRQ); ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT); diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript index 0ec5ce7e3632..cefac0be51cb 100755 --- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript +++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/sconscript @@ -29,7 +29,7 @@ if env.GetOption('clean') : cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, []) env.KernelObjTarget('mali_kutf_irq_test', cmd) else: - makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(), '$MAKECOMSTR') + makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(extra_cflags = ['-DCONFIG_MALI_IRQ_LATENCY'], extra_configs = ['CONFIG_MALI_IRQ_LATENCY=m']), '$MAKECOMSTR') cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction]) env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko') env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko') diff --git a/drivers/gpu/arm/midgard/tests/sconscript b/drivers/gpu/arm/midgard/tests/sconscript index 0bd24a5e3f35..ca64e8360955 100755 --- a/drivers/gpu/arm/midgard/tests/sconscript +++ b/drivers/gpu/arm/midgard/tests/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2011, 2013, 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2011, 2013, 2017-2018 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,5 @@ if kutf_env['debug'] == '1': SConscript('kutf_test_runner/sconscript') if env['unit'] == '1': - SConscript('mali_kutf_ipa_test/sconscript') SConscript('mali_kutf_ipa_unit_test/sconscript') SConscript('mali_kutf_vinstr_test/sconscript') diff --git a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c index 6857eb761ee2..3aab51a173f0 100755 --- a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c +++ b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c @@ -49,12 +49,15 @@ * alignment, length and limits for the allocation * @is_shader_code: True if the allocation is for shader code (which has * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk * * Return: true if gap_end is now aligned correctly and is still in range, * false otherwise */ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, - struct vm_unmapped_area_info *info, bool is_shader_code) + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) { /* Compute highest gap address at the desired alignment */ (*gap_end) -= info->length; @@ -72,6 +75,35 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + info->length) & BASE_MEM_MASK_4GB)) return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = + ALIGN(offset, info->align_mask + 1); + + start -= rounded_offset; + end -= rounded_offset; + + *gap_end = start; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } } @@ -89,6 +121,8 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, * @is_shader_code: Boolean which denotes whether the allocated area is * intended for the use by shader core in which case a * special alignment requirements apply. + * @is_same_4gb_page: Boolean which indicates whether the allocated area needs + * to reside completely within a 4GB chunk. * * The unmapped_area_topdown() function in the Linux kernel is not exported * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a @@ -97,25 +131,26 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, * of this function and prefixed it with 'kbase_'. * * The difference in the call parameter list comes from the fact that - * kbase_unmapped_area_topdown() is called with additional parameter which - * is provided to denote whether the allocation is for a shader core memory - * or not. This is significant since the executable shader core memory has - * additional alignment requirements. + * kbase_unmapped_area_topdown() is called with additional parameters which + * are provided to indicate whether the allocation is for a shader core memory, + * which has additional alignment requirements, and whether the allocation can + * straddle a 4GB boundary. * * The modification of the original Linux function lies in how the computation * of the highest gap address at the desired alignment is performed once the * gap with desirable properties is found. For this purpose a special function * is introduced (@ref align_and_check()) which beside computing the gap end - * at the desired alignment also performs additional alignment check for the - * case when the memory is executable shader core memory. For such case, it is - * ensured that the gap does not end on a 4GB boundary. + * at the desired alignment also performs additional alignment checks for the + * case when the memory is executable shader core memory, for which it is + * ensured that the gap does not end on a 4GB boundary, and for the case when + * memory needs to be confined within a 4GB chunk. * * Return: address of the found gap end (high limit) if area is found; * -ENOMEM if search is unsuccessful */ static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info - *info, bool is_shader_code) + *info, bool is_shader_code, bool is_same_4gb_page) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -142,7 +177,8 @@ static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; if (gap_start <= high_limit) { - if (align_and_check(&gap_end, gap_start, info, is_shader_code)) + if (align_and_check(&gap_end, gap_start, info, + is_shader_code, is_same_4gb_page)) return gap_end; } @@ -178,7 +214,7 @@ check_current: gap_end = info->high_limit; if (align_and_check(&gap_end, gap_start, info, - is_shader_code)) + is_shader_code, is_same_4gb_page)) return gap_end; } @@ -232,6 +268,7 @@ unsigned long kbase_get_unmapped_area(struct file *filp, int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; bool is_shader_code = false; + bool is_same_4gb_page = false; unsigned long ret; /* err on fixed address */ @@ -291,6 +328,8 @@ unsigned long kbase_get_unmapped_area(struct file *filp, align_mask = extent_bytes - 1; align_offset = extent_bytes - (reg->initial_commit << PAGE_SHIFT); + } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + is_same_4gb_page = true; } #ifndef CONFIG_64BIT } else { @@ -306,7 +345,8 @@ unsigned long kbase_get_unmapped_area(struct file *filp, info.align_offset = align_offset; info.align_mask = align_mask; - ret = kbase_unmapped_area_topdown(&info, is_shader_code); + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && high_limit < (kctx->same_va_end << PAGE_SHIFT)) { @@ -315,7 +355,8 @@ unsigned long kbase_get_unmapped_area(struct file *filp, info.high_limit = min_t(u64, TASK_SIZE, (kctx->same_va_end << PAGE_SHIFT)); - ret = kbase_unmapped_area_topdown(&info, is_shader_code); + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); } return ret; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 913e4c10fcd8..43cb33dc8333 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -300,9 +300,4 @@ config DRM_SAVAGE Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister chipset. If M is selected the module will be called savage. -config DRM_BIFROST - bool - depends on HAS_IOMEM && DRM_LEGACY && DRM - help - endif # DRM_LEGACY diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 002865b708b6..a9005e707ee3 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -87,5 +87,3 @@ obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/ obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/ obj-$(CONFIG_DRM_ARCPGU)+= arc/ obj-y += hisilicon/ - -obj-$(CONFIG_DRM_BIFROST) += bifrost/ diff --git a/drivers/gpu/drm/bifrost/Kbuild b/drivers/gpu/drm/bifrost/Kbuild deleted file mode 100755 index 1a6fa3c9a3f6..000000000000 --- a/drivers/gpu/drm/bifrost/Kbuild +++ /dev/null @@ -1,23 +0,0 @@ -# -# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -obj-$(CONFIG_MALI_MIDGARD) += midgard/ diff --git a/drivers/gpu/drm/bifrost/Kconfig b/drivers/gpu/drm/bifrost/Kconfig deleted file mode 100644 index 693b86f8a144..000000000000 --- a/drivers/gpu/drm/bifrost/Kconfig +++ /dev/null @@ -1,25 +0,0 @@ -# -# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -menu "ARM GPU Configuration" -source "drivers/gpu/arm/midgard/Kconfig" -endmenu diff --git a/drivers/gpu/drm/bifrost/midgard/Kbuild b/drivers/gpu/drm/bifrost/midgard/Kbuild deleted file mode 100755 index 89ba6ce66393..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/Kbuild +++ /dev/null @@ -1,169 +0,0 @@ -# -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -# Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r16p0-01rel0" - -# Paths required for build -KBASE_PATH = $(src) -KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy -UMP_PATH = $(src)/../../../base - -# Set up defaults if not defined by build system -MALI_CUSTOMER_RELEASE ?= 1 -MALI_USE_CSF ?= 0 -MALI_UNIT_TEST ?= 0 -MALI_KERNEL_TEST_API ?= 0 -MALI_COVERAGE ?= 0 -CONFIG_MALI_PLATFORM_NAME ?= "devicetree" - -# Set up our defines, which will be passed to gcc -DEFINES = \ - -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ - -DMALI_USE_CSF=$(MALI_USE_CSF) \ - -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ - -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ - -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" - -ifeq ($(KBUILD_EXTMOD),) -# in-tree -DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) -else -# out-of-tree -DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) -endif - -DEFINES += -I$(srctree)/drivers/staging/android -ldflags-y += --strip-debug - -DEFINES += -DMALI_KBASE_BUILD - -# Use our defines when compiling -ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux -subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux - -SRC := \ - mali_kbase_device.c \ - mali_kbase_cache_policy.c \ - mali_kbase_mem.c \ - mali_kbase_mmu.c \ - mali_kbase_ctx_sched.c \ - mali_kbase_jd.c \ - mali_kbase_jd_debugfs.c \ - mali_kbase_jm.c \ - mali_kbase_gpuprops.c \ - mali_kbase_js.c \ - mali_kbase_js_ctx_attr.c \ - mali_kbase_event.c \ - mali_kbase_context.c \ - mali_kbase_pm.c \ - mali_kbase_config.c \ - mali_kbase_vinstr.c \ - mali_kbase_hwcnt.c \ - mali_kbase_hwcnt_backend_gpu.c \ - mali_kbase_hwcnt_gpu.c \ - mali_kbase_hwcnt_legacy.c \ - mali_kbase_hwcnt_types.c \ - mali_kbase_hwcnt_virtualizer.c \ - mali_kbase_softjobs.c \ - mali_kbase_10969_workaround.c \ - mali_kbase_hw.c \ - mali_kbase_debug.c \ - mali_kbase_gpu_memory_debugfs.c \ - mali_kbase_mem_linux.c \ - mali_kbase_core_linux.c \ - mali_kbase_replay.c \ - mali_kbase_mem_profile_debugfs.c \ - mali_kbase_mmu_mode_lpae.c \ - mali_kbase_mmu_mode_aarch64.c \ - mali_kbase_disjoint_events.c \ - mali_kbase_gator_api.c \ - mali_kbase_debug_mem_view.c \ - mali_kbase_debug_job_fault.c \ - mali_kbase_smc.c \ - mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c \ - mali_kbase_tlstream.c \ - mali_kbase_strings.c \ - mali_kbase_as_fault_debugfs.c \ - mali_kbase_regs_history_debugfs.c \ - thirdparty/mali_kbase_mmap.c - - -ifeq ($(CONFIG_MALI_CINSTR_GWT),y) - SRC += mali_kbase_gwt.c -endif - -ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_tlstream_test.c -endif - -ifeq ($(MALI_CUSTOMER_RELEASE),0) - SRC += mali_kbase_regs_dump_debugfs.c -endif - - -ccflags-y += -I$(KBASE_PATH) - -# Tell the Linux build system from which .o file to create the kernel module -obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o - -# Tell the Linux build system to enable building of our .c files -mali_kbase-y := $(SRC:.c=.o) - -# Kconfig passes in the name with quotes for in-tree builds - remove them. -platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) -MALI_PLATFORM_DIR := platform/$(platform_name) -ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR) -include $(src)/$(MALI_PLATFORM_DIR)/Kbuild - -ifeq ($(CONFIG_MALI_DEVFREQ),y) - ifeq ($(CONFIG_DEVFREQ_THERMAL),y) - include $(src)/ipa/Kbuild - endif -endif - -ifeq ($(MALI_USE_CSF),1) - include $(src)/csf/Kbuild -endif - -mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ - mali_kbase_dma_fence.o \ - mali_kbase_fence.o -mali_kbase-$(CONFIG_SYNC) += \ - mali_kbase_sync_android.o \ - mali_kbase_sync_common.o -mali_kbase-$(CONFIG_SYNC_FILE) += \ - mali_kbase_sync_file.o \ - mali_kbase_sync_common.o \ - mali_kbase_fence.o - -include $(src)/backend/gpu/Kbuild -mali_kbase-y += $(BACKEND:.c=.o) - - -ccflags-y += -I$(src)/backend/gpu -subdir-ccflags-y += -I$(src)/backend/gpu - -# For kutf and mali_kutf_irq_latency_test -obj-$(CONFIG_MALI_KUTF) += tests/ diff --git a/drivers/gpu/drm/bifrost/midgard/Kconfig b/drivers/gpu/drm/bifrost/midgard/Kconfig deleted file mode 100644 index 7c100165e1ee..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/Kconfig +++ /dev/null @@ -1,211 +0,0 @@ -# -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -menuconfig MALI_MIDGARD - tristate "Mali Midgard series support" - select GPU_TRACEPOINTS if ANDROID - default n - help - Enable this option to build support for a ARM Mali Midgard GPU. - - To compile this driver as a module, choose M here: - this will generate a single module, called mali_kbase. - -config MALI_GATOR_SUPPORT - bool "Enable Streamline tracing support" - depends on MALI_MIDGARD - default y - help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. - -config MALI_MIDGARD_DVFS - bool "Enable legacy DVFS" - depends on MALI_MIDGARD && !MALI_DEVFREQ - default n - help - Choose this option to enable legacy DVFS in the Mali Midgard DDK. - -config MALI_MIDGARD_ENABLE_TRACE - bool "Enable kbase tracing" - depends on MALI_MIDGARD - default n - help - Enables tracing in kbase. Trace log available through - the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled - -config MALI_DEVFREQ - bool "devfreq support for Mali" - depends on MALI_MIDGARD && PM_DEVFREQ - default y - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simpleondemand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - -config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali" - depends on MALI_MIDGARD - default n - help - Support DMA_BUF fences for Mali. - - This option should only be enabled if the Linux Kernel has built in - support for DMA_BUF fences. - -config MALI_PLATFORM_NAME - depends on MALI_MIDGARD - string "Platform name" - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - -# MALI_EXPERT configuration options - -menuconfig MALI_EXPERT - depends on MALI_MIDGARD - bool "Enable Expert Settings" - default n - help - Enabling this option and modifying the default settings may produce a driver with performance or - other limitations. - -config MALI_CORESTACK - bool "Support controlling power to the GPU core stack" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Enabling this feature on supported GPUs will let the driver powering - on/off the GPU core stack independently without involving the Power - Domain Controller. This should only be enabled on platforms which - integration of the PDC to the Mali GPU is known to be problematic. - This feature is currently only supported on t-Six and t-HEx GPUs. - - If unsure, say N. - -config MALI_DEBUG - bool "Debug build" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option for increased checking and reporting of errors. - -config MALI_FENCE_DEBUG - bool "Debug sync fence usage" - depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) - default y if MALI_DEBUG - help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. - - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. - -config MALI_NO_MALI - bool "No Mali" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - This can be used to test the driver in a simulated environment - whereby the hardware is not physically present. If the hardware is physically - present it will not be used. This can be used to test the majority of the - driver without needing actual hardware or for software benchmarking. - All calls to the simulated hardware will complete immediately as if the hardware - completed the task. - -config MALI_ERROR_INJECT - bool "Error injection" - depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - -config MALI_SYSTEM_TRACE - bool "Enable system event tracing support" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Choose this option to enable system trace events for each - kbase event. This is typically used for debugging but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_2MB_ALLOC - bool "Attempt to allocate 2MB pages" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. - - If in doubt, say N - -config MALI_PWRSOFT_765 - bool "PWRSOFT-765 ticket" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged - in kernel v4.10, however if backported into the kernel then this - option must be manually selected. - - If using kernel >= v4.10 then say N, otherwise if devfreq cooling - changes have been backported say Y to avoid compilation errors. - -# Instrumentation options. - -config MALI_JOB_DUMP - bool "Enable system level support needed for job dumping" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Choose this option to enable system level support needed for - job dumping. This is typically used for instrumentation but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_PRFCNT_SET_SECONDARY - bool "Use secondary set of performance counters" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option to use secondary set of performance counters. Kernel - features that depend on an access to the primary set of counters may - become unavailable. Enabling this option will prevent power management - from working optimally and may cause instrumentation tools to return - bogus results. - - If unsure, say N. - -source "drivers/gpu/arm/midgard/platform/Kconfig" -source "drivers/gpu/arm/midgard/tests/Kconfig" diff --git a/drivers/gpu/drm/bifrost/midgard/Makefile b/drivers/gpu/drm/bifrost/midgard/Makefile deleted file mode 100644 index 08b2fa9f202c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -# -# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -KDIR ?= /lib/modules/$(shell uname -r)/build - -BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. -KBASE_PATH_RELATIVE = $(CURDIR) - -ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y) -#Add bus logger symbols -EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers -endif - -# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions -all: - $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules - -clean: - $(MAKE) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/drm/bifrost/midgard/Makefile.kbase b/drivers/gpu/drm/bifrost/midgard/Makefile.kbase deleted file mode 100755 index 6b0f81ee76e8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/Makefile.kbase +++ /dev/null @@ -1,23 +0,0 @@ -# -# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM) - diff --git a/drivers/gpu/drm/bifrost/midgard/Mconfig b/drivers/gpu/drm/bifrost/midgard/Mconfig deleted file mode 100755 index 46dca1498fe8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/Mconfig +++ /dev/null @@ -1,190 +0,0 @@ -# -# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. -# -# - - -menuconfig MALI_MIDGARD - bool "Mali Midgard series support" - default y - help - Enable this option to build support for a ARM Mali Midgard GPU. - - To compile this driver as a module, choose M here: - this will generate a single module, called mali_kbase. - -config MALI_GATOR_SUPPORT - bool "Enable Streamline tracing support" - depends on MALI_MIDGARD && !BACKEND_USER - default y - help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. - -config MALI_MIDGARD_DVFS - bool "Enable legacy DVFS" - depends on MALI_MIDGARD && !MALI_DEVFREQ - default n - help - Choose this option to enable legacy DVFS in the Mali Midgard DDK. - -config MALI_MIDGARD_ENABLE_TRACE - bool "Enable kbase tracing" - depends on MALI_MIDGARD - default n - help - Enables tracing in kbase. Trace log available through - the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled - -config MALI_DEVFREQ - bool "devfreq support for Mali" - depends on MALI_MIDGARD - default y if PLATFORM_JUNO - default y if PLATFORM_CUSTOM - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simpleondemand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - -config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali" - depends on MALI_MIDGARD - default n - help - Support DMA_BUF fences for Mali. - - This option should only be enabled if the Linux Kernel has built in - support for DMA_BUF fences. - -config MALI_PLATFORM_NAME - depends on MALI_MIDGARD - string "Platform name" - default "arndale" if PLATFORM_ARNDALE - default "arndale_octa" if PLATFORM_ARNDALE_OCTA - default "rk" if PLATFORM_FIREFLY - default "hisilicon" if PLATFORM_HIKEY960 - default "hisilicon" if PLATFORM_HIKEY970 - default "vexpress" if PLATFORM_VEXPRESS - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - - When PLATFORM_CUSTOM is set, this needs to be set manually to - pick up the desired platform files. - -# MALI_EXPERT configuration options - -menuconfig MALI_EXPERT - depends on MALI_MIDGARD - bool "Enable Expert Settings" - default y - help - Enabling this option and modifying the default settings may produce a driver with performance or - other limitations. - -config MALI_CORESTACK - bool "Support controlling power to the GPU core stack" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Enabling this feature on supported GPUs will let the driver powering - on/off the GPU core stack independently without involving the Power - Domain Controller. This should only be enabled on platforms which - integration of the PDC to the Mali GPU is known to be problematic. - This feature is currently only supported on t-Six and t-HEx GPUs. - - If unsure, say N. - -config MALI_DEBUG - bool "Debug build" - depends on MALI_MIDGARD && MALI_EXPERT - default y if DEBUG - default n - help - Select this option for increased checking and reporting of errors. - -config MALI_FENCE_DEBUG - bool "Debug sync fence usage" - depends on MALI_MIDGARD && MALI_EXPERT - default y if MALI_DEBUG - help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. - - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. - -config MALI_ERROR_INJECT - bool "Error injection" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - -config MALI_ERROR_INJECT_RANDOM - bool "Random error injection" - depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT - default n - help - Injected errors are random, rather than user-driven. - -config MALI_SYSTEM_TRACE - bool "Enable system event tracing support" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Choose this option to enable system trace events for each - kbase event. This is typically used for debugging but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_2MB_ALLOC - bool "Attempt to allocate 2MB pages" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. - - If in doubt, say N - -config MALI_FPGA_BUS_LOGGER - bool "Enable bus log integration" - depends on MALI_MIDGARD && MALI_EXPERT - default n - -config MALI_PWRSOFT_765 - bool "PWRSOFT-765 ticket" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - PWRSOFT-765 fixes devfreq cooling devices issues. However, they are - not merged in mainline kernel yet. So this define helps to guard those - parts of the code. - -# Instrumentation options. - -# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. -# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. - -source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/Kbuild b/drivers/gpu/drm/bifrost/midgard/backend/gpu/Kbuild deleted file mode 100755 index 2dc14559c6f9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/Kbuild +++ /dev/null @@ -1,60 +0,0 @@ -# -# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -BACKEND += \ - backend/gpu/mali_kbase_cache_policy_backend.c \ - backend/gpu/mali_kbase_device_hw.c \ - backend/gpu/mali_kbase_gpu.c \ - backend/gpu/mali_kbase_gpuprops_backend.c \ - backend/gpu/mali_kbase_debug_job_fault_backend.c \ - backend/gpu/mali_kbase_irq_linux.c \ - backend/gpu/mali_kbase_instr_backend.c \ - backend/gpu/mali_kbase_jm_as.c \ - backend/gpu/mali_kbase_jm_hw.c \ - backend/gpu/mali_kbase_jm_rb.c \ - backend/gpu/mali_kbase_js_backend.c \ - backend/gpu/mali_kbase_mmu_hw_direct.c \ - backend/gpu/mali_kbase_pm_backend.c \ - backend/gpu/mali_kbase_pm_driver.c \ - backend/gpu/mali_kbase_pm_metrics.c \ - backend/gpu/mali_kbase_pm_ca.c \ - backend/gpu/mali_kbase_pm_always_on.c \ - backend/gpu/mali_kbase_pm_coarse_demand.c \ - backend/gpu/mali_kbase_pm_policy.c \ - backend/gpu/mali_kbase_time.c - -ifeq ($(MALI_CUSTOMER_RELEASE),0) -BACKEND += \ - backend/gpu/mali_kbase_pm_always_on_demand.c -endif - -ifeq ($(CONFIG_MALI_DEVFREQ),y) -BACKEND += \ - backend/gpu/mali_kbase_devfreq.c -endif - -ifeq ($(CONFIG_MALI_NO_MALI),y) - # Dummy model - BACKEND += backend/gpu/mali_kbase_model_dummy.c - BACKEND += backend/gpu/mali_kbase_model_linux.c - # HW error simulation - BACKEND += backend/gpu/mali_kbase_model_error_generator.c -endif diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_backend_config.h deleted file mode 100644 index 4a61f96c8c7d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_backend_config.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Backend specific configuration - */ - -#ifndef _KBASE_BACKEND_CONFIG_H_ -#define _KBASE_BACKEND_CONFIG_H_ - -#endif /* _KBASE_BACKEND_CONFIG_H_ */ - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.c deleted file mode 100644 index 7378bfd7b397..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "backend/gpu/mali_kbase_cache_policy_backend.h" -#include - -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode) -{ - kbdev->current_gpu_coherency_mode = mode; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); -} - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.h deleted file mode 100644 index f78ada74f605..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ -#define _KBASE_CACHE_POLICY_BACKEND_H_ - -#include "mali_kbase.h" -#include "mali_base_kernel.h" - -/** - * kbase_cache_set_coherency_mode() - Sets the system coherency mode - * in the GPU. - * @kbdev: Device pointer - * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE - */ -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode); - -#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c deleted file mode 100644 index 450f6e750a0c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include "mali_kbase_debug_job_fault.h" - -#ifdef CONFIG_DEBUG_FS - -/*GPU_CONTROL_REG(r)*/ -static int gpu_control_reg_snapshot[] = { - GPU_ID, - SHADER_READY_LO, - SHADER_READY_HI, - TILER_READY_LO, - TILER_READY_HI, - L2_READY_LO, - L2_READY_HI -}; - -/* JOB_CONTROL_REG(r) */ -static int job_control_reg_snapshot[] = { - JOB_IRQ_MASK, - JOB_IRQ_STATUS -}; - -/* JOB_SLOT_REG(n,r) */ -static int job_slot_reg_snapshot[] = { - JS_HEAD_LO, - JS_HEAD_HI, - JS_TAIL_LO, - JS_TAIL_HI, - JS_AFFINITY_LO, - JS_AFFINITY_HI, - JS_CONFIG, - JS_STATUS, - JS_HEAD_NEXT_LO, - JS_HEAD_NEXT_HI, - JS_AFFINITY_NEXT_LO, - JS_AFFINITY_NEXT_HI, - JS_CONFIG_NEXT -}; - -/*MMU_REG(r)*/ -static int mmu_reg_snapshot[] = { - MMU_IRQ_MASK, - MMU_IRQ_STATUS -}; - -/* MMU_AS_REG(n,r) */ -static int as_reg_snapshot[] = { - AS_TRANSTAB_LO, - AS_TRANSTAB_HI, - AS_MEMATTR_LO, - AS_MEMATTR_HI, - AS_FAULTSTATUS, - AS_FAULTADDRESS_LO, - AS_FAULTADDRESS_HI, - AS_STATUS -}; - -bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, - int reg_range) -{ - int i, j; - int offset = 0; - int slot_number; - int as_number; - - if (kctx->reg_dump == NULL) - return false; - - slot_number = kctx->kbdev->gpu_props.num_job_slots; - as_number = kctx->kbdev->gpu_props.num_address_spaces; - - /* get the GPU control registers*/ - for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); - offset += 2; - } - - /* get the Job control registers*/ - for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_CONTROL_REG(job_control_reg_snapshot[i]); - offset += 2; - } - - /* get the Job Slot registers*/ - for (j = 0; j < slot_number; j++) { - for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); - offset += 2; - } - } - - /* get the MMU registers*/ - for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); - offset += 2; - } - - /* get the Address space registers*/ - for (j = 0; j < as_number; j++) { - for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - MMU_AS_REG(j, as_reg_snapshot[i]); - offset += 2; - } - } - - WARN_ON(offset >= (reg_range*2/4)); - - /* set the termination flag*/ - kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; - kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; - - dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", - offset); - - return true; -} - -bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) -{ - int offset = 0; - - if (kctx->reg_dump == NULL) - return false; - - while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { - kctx->reg_dump[offset+1] = - kbase_reg_read(kctx->kbdev, - kctx->reg_dump[offset]); - offset += 2; - } - return true; -} - - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.c deleted file mode 100755 index 5ade0122b5bb..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include - -#include -#include -#include -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#endif - -#include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) -#include -#else /* Linux >= 3.13 */ -/* In 3.13 the OPP include header file, types, and functions were all - * renamed. Use the old filename for the include, and define the new names to - * the old, when an old kernel is detected. - */ -#include -#define dev_pm_opp opp -#define dev_pm_opp_get_voltage opp_get_voltage -#define dev_pm_opp_get_opp_count opp_get_opp_count -#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil -#define dev_pm_opp_find_freq_floor opp_find_freq_floor -#endif /* Linux >= 3.13 */ - -/** - * opp_translate - Translate nominal OPP frequency from devicetree into real - * frequency and core mask - * @kbdev: Device pointer - * @freq: Nominal frequency - * @core_mask: Pointer to u64 to store core mask to - * - * Return: Real target frequency - * - * This function will only perform translation if an operating-points-v2-mali - * table is present in devicetree. If one is not present then it will return an - * untranslated frequency and all cores enabled. - */ -static unsigned long opp_translate(struct kbase_device *kbdev, - unsigned long freq, u64 *core_mask) -{ - int i; - - for (i = 0; i < kbdev->num_opps; i++) { - if (kbdev->opp_table[i].opp_freq == freq) { - *core_mask = kbdev->opp_table[i].core_mask; - return kbdev->opp_table[i].real_freq; - } - } - - /* Failed to find OPP - return all cores enabled & nominal frequency */ - *core_mask = kbdev->gpu_props.props.raw_props.shader_present; - - return freq; -} - -static int -kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - struct dev_pm_opp *opp; - unsigned long nominal_freq; - unsigned long freq = 0; - unsigned long voltage; - int err; - u64 core_mask; - - freq = *target_freq; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_lock(); -#endif - opp = devfreq_recommended_opp(dev, &freq, flags); - voltage = dev_pm_opp_get_voltage(opp); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_unlock(); -#endif - if (IS_ERR_OR_NULL(opp)) { - dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); - return PTR_ERR(opp); - } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - dev_pm_opp_put(opp); -#endif - - nominal_freq = freq; - - /* - * Only update if there is a change of frequency - */ - if (kbdev->current_nominal_freq == nominal_freq) { - *target_freq = nominal_freq; - return 0; - } - - freq = opp_translate(kbdev, nominal_freq, &core_mask); -#ifdef CONFIG_REGULATOR - if (kbdev->regulator && kbdev->current_voltage != voltage - && kbdev->current_freq < freq) { - err = regulator_set_voltage(kbdev->regulator, voltage, voltage); - if (err) { - dev_err(dev, "Failed to increase voltage (%d)\n", err); - return err; - } - } -#endif - - err = clk_set_rate(kbdev->clock, freq); - if (err) { - dev_err(dev, "Failed to set clock %lu (target %lu)\n", - freq, *target_freq); - return err; - } - -#ifdef CONFIG_REGULATOR - if (kbdev->regulator && kbdev->current_voltage != voltage - && kbdev->current_freq > freq) { - err = regulator_set_voltage(kbdev->regulator, voltage, voltage); - if (err) { - dev_err(dev, "Failed to decrease voltage (%d)\n", err); - return err; - } - } -#endif - - kbase_devfreq_set_core_mask(kbdev, core_mask); - - *target_freq = nominal_freq; - kbdev->current_voltage = voltage; - kbdev->current_nominal_freq = nominal_freq; - kbdev->current_freq = freq; - kbdev->current_core_mask = core_mask; - - KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq); - - return err; -} - -static int -kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - - *freq = kbdev->current_nominal_freq; - - return 0; -} - -static int -kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - struct kbasep_pm_metrics diff; - - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff); - - stat->busy_time = diff.time_busy; - stat->total_time = diff.time_busy + diff.time_idle; - stat->current_frequency = kbdev->current_nominal_freq; - stat->private_data = NULL; - - return 0; -} - -static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, - struct devfreq_dev_profile *dp) -{ - int count; - int i = 0; - unsigned long freq; - struct dev_pm_opp *opp; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_lock(); -#endif - count = dev_pm_opp_get_opp_count(kbdev->dev); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_unlock(); -#endif - if (count < 0) - return count; - - dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), - GFP_KERNEL); - if (!dp->freq_table) - return -ENOMEM; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_lock(); -#endif - for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { - opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); - if (IS_ERR(opp)) - break; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) - dev_pm_opp_put(opp); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */ - - dp->freq_table[i] = freq; - } -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) - rcu_read_unlock(); -#endif - - if (count != i) - dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", - count, i); - - dp->max_state = i; - - return 0; -} - -static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) -{ - struct devfreq_dev_profile *dp = kbdev->devfreq->profile; - - kfree(dp->freq_table); -} - -static void kbase_devfreq_exit(struct device *dev) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - - kbase_devfreq_term_freq_table(kbdev); -} - -static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) -{ - struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, - "operating-points-v2", 0); - struct device_node *node; - int i = 0; - int count; - u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; - - if (!opp_node) - return 0; - if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) - return 0; - - count = dev_pm_opp_get_opp_count(kbdev->dev); - kbdev->opp_table = kmalloc_array(count, - sizeof(struct kbase_devfreq_opp), GFP_KERNEL); - if (!kbdev->opp_table) - return -ENOMEM; - - for_each_available_child_of_node(opp_node, node) { - u64 core_mask; - u64 opp_freq, real_freq; - const void *core_count_p; - - if (of_property_read_u64(node, "opp-hz", &opp_freq)) { - dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n"); - continue; - } - if (of_property_read_u64(node, "opp-hz-real", &real_freq)) - real_freq = opp_freq; - if (of_property_read_u64(node, "opp-core-mask", &core_mask)) - core_mask = shader_present; - if (core_mask != shader_present && - (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) || - corestack_driver_control || - platform_power_down_only)) { - - dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", - opp_freq); - continue; - } - - core_count_p = of_get_property(node, "opp-core-count", NULL); - if (core_count_p) { - u64 remaining_core_mask = - kbdev->gpu_props.props.raw_props.shader_present; - int core_count = be32_to_cpup(core_count_p); - - core_mask = 0; - - for (; core_count > 0; core_count--) { - int core = ffs(remaining_core_mask); - - if (!core) { - dev_err(kbdev->dev, "OPP has more cores than GPU\n"); - return -ENODEV; - } - - core_mask |= (1ull << (core-1)); - remaining_core_mask &= ~(1ull << (core-1)); - } - } - - if (!core_mask) { - dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); - return -ENODEV; - } - - kbdev->opp_table[i].opp_freq = opp_freq; - kbdev->opp_table[i].real_freq = real_freq; - kbdev->opp_table[i].core_mask = core_mask; - - dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n", - i, opp_freq, real_freq, core_mask); - - i++; - } - - kbdev->num_opps = i; - - return 0; -} - -int kbase_devfreq_init(struct kbase_device *kbdev) -{ - struct devfreq_dev_profile *dp; - int err; - - if (!kbdev->clock) { - dev_err(kbdev->dev, "Clock not available for devfreq\n"); - return -ENODEV; - } - - kbdev->current_freq = clk_get_rate(kbdev->clock); - kbdev->current_nominal_freq = kbdev->current_freq; - - dp = &kbdev->devfreq_profile; - - dp->initial_freq = kbdev->current_freq; - dp->polling_ms = 100; - dp->target = kbase_devfreq_target; - dp->get_dev_status = kbase_devfreq_status; - dp->get_cur_freq = kbase_devfreq_cur_freq; - dp->exit = kbase_devfreq_exit; - - if (kbase_devfreq_init_freq_table(kbdev, dp)) - return -EFAULT; - - if (dp->max_state > 0) { - /* Record the maximum frequency possible */ - kbdev->gpu_props.props.core_props.gpu_freq_khz_max = - dp->freq_table[0] / 1000; - }; - - err = kbase_devfreq_init_core_mask_table(kbdev); - if (err) - return err; - - kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, - "simple_ondemand", NULL); - if (IS_ERR(kbdev->devfreq)) { - kfree(dp->freq_table); - return PTR_ERR(kbdev->devfreq); - } - - /* devfreq_add_device only copies a few of kbdev->dev's fields, so - * set drvdata explicitly so IPA models can access kbdev. */ - dev_set_drvdata(&kbdev->devfreq->dev, kbdev); - - err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); - if (err) { - dev_err(kbdev->dev, - "Failed to register OPP notifier (%d)\n", err); - goto opp_notifier_failed; - } - -#ifdef CONFIG_DEVFREQ_THERMAL - err = kbase_ipa_init(kbdev); - if (err) { - dev_err(kbdev->dev, "IPA initialization failed\n"); - goto cooling_failed; - } - - kbdev->devfreq_cooling = of_devfreq_cooling_register_power( - kbdev->dev->of_node, - kbdev->devfreq, - &kbase_ipa_power_model_ops); - if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { - err = PTR_ERR(kbdev->devfreq_cooling); - dev_err(kbdev->dev, - "Failed to register cooling device (%d)\n", - err); - goto cooling_failed; - } -#endif - - return 0; - -#ifdef CONFIG_DEVFREQ_THERMAL -cooling_failed: - devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); -#endif /* CONFIG_DEVFREQ_THERMAL */ -opp_notifier_failed: - if (devfreq_remove_device(kbdev->devfreq)) - dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); - else - kbdev->devfreq = NULL; - - return err; -} - -void kbase_devfreq_term(struct kbase_device *kbdev) -{ - int err; - - dev_dbg(kbdev->dev, "Term Mali devfreq\n"); - -#ifdef CONFIG_DEVFREQ_THERMAL - if (kbdev->devfreq_cooling) - devfreq_cooling_unregister(kbdev->devfreq_cooling); - - kbase_ipa_term(kbdev); -#endif - - devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); - - err = devfreq_remove_device(kbdev->devfreq); - if (err) - dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); - else - kbdev->devfreq = NULL; - - kfree(kbdev->opp_table); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.h deleted file mode 100755 index 0634038c5fee..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_devfreq.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _BASE_DEVFREQ_H_ -#define _BASE_DEVFREQ_H_ - -int kbase_devfreq_init(struct kbase_device *kbdev); -void kbase_devfreq_term(struct kbase_device *kbdev); - -#endif /* _BASE_DEVFREQ_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_hw.c deleted file mode 100755 index 5dd059fb3420..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_hw.c +++ /dev/null @@ -1,340 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * - */ -#include -#include -#include - -#include -#include - -#if !defined(CONFIG_MALI_NO_MALI) - - -#ifdef CONFIG_DEBUG_FS - - -int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) -{ - struct kbase_io_access *old_buf; - struct kbase_io_access *new_buf; - unsigned long flags; - - if (!new_size) - goto out_err; /* The new size must not be 0 */ - - new_buf = vmalloc(new_size * sizeof(*h->buf)); - if (!new_buf) - goto out_err; - - spin_lock_irqsave(&h->lock, flags); - - old_buf = h->buf; - - /* Note: we won't bother with copying the old data over. The dumping - * logic wouldn't work properly as it relies on 'count' both as a - * counter and as an index to the buffer which would have changed with - * the new array. This is a corner case that we don't need to support. - */ - h->count = 0; - h->size = new_size; - h->buf = new_buf; - - spin_unlock_irqrestore(&h->lock, flags); - - vfree(old_buf); - - return 0; - -out_err: - return -1; -} - - -int kbase_io_history_init(struct kbase_io_history *h, u16 n) -{ - h->enabled = false; - spin_lock_init(&h->lock); - h->count = 0; - h->size = 0; - h->buf = NULL; - if (kbase_io_history_resize(h, n)) - return -1; - - return 0; -} - - -void kbase_io_history_term(struct kbase_io_history *h) -{ - vfree(h->buf); - h->buf = NULL; -} - - -/* kbase_io_history_add - add new entry to the register access history - * - * @h: Pointer to the history data structure - * @addr: Register address - * @value: The value that is either read from or written to the register - * @write: 1 if it's a register write, 0 if it's a read - */ -static void kbase_io_history_add(struct kbase_io_history *h, - void __iomem const *addr, u32 value, u8 write) -{ - struct kbase_io_access *io; - unsigned long flags; - - spin_lock_irqsave(&h->lock, flags); - - io = &h->buf[h->count % h->size]; - io->addr = (uintptr_t)addr | write; - io->value = value; - ++h->count; - /* If count overflows, move the index by the buffer size so the entire - * buffer will still be dumped later */ - if (unlikely(!h->count)) - h->count = h->size; - - spin_unlock_irqrestore(&h->lock, flags); -} - - -void kbase_io_history_dump(struct kbase_device *kbdev) -{ - struct kbase_io_history *const h = &kbdev->io_history; - u16 i; - size_t iters; - unsigned long flags; - - if (!unlikely(h->enabled)) - return; - - spin_lock_irqsave(&h->lock, flags); - - dev_err(kbdev->dev, "Register IO History:"); - iters = (h->size > h->count) ? h->count : h->size; - dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, - h->count); - for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; - char const access = (io->addr & 1) ? 'w' : 'r'; - - dev_err(kbdev->dev, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); - } - - spin_unlock_irqrestore(&h->lock, flags); -} - - -#endif /* CONFIG_DEBUG_FS */ - - -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) -{ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); - - writel(value, kbdev->reg + offset); - -#ifdef CONFIG_DEBUG_FS - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - value, 1); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); -} - -KBASE_EXPORT_TEST_API(kbase_reg_write); - -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) -{ - u32 val; - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - KBASE_DEBUG_ASSERT(kbdev->dev != NULL); - - val = readl(kbdev->reg + offset); - -#ifdef CONFIG_DEBUG_FS - if (unlikely(kbdev->io_history.enabled)) - kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, - val, 0); -#endif /* CONFIG_DEBUG_FS */ - dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); - - return val; -} - -KBASE_EXPORT_TEST_API(kbase_reg_read); -#endif /* !defined(CONFIG_MALI_NO_MALI) */ - -/** - * kbase_report_gpu_fault - Report a GPU fault. - * @kbdev: Kbase device pointer - * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS - * was also set - * - * This function is called from the interrupt handler when a GPU fault occurs. - * It reports the details of the fault using dev_warn(). - */ -static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) -{ - u32 status; - u64 address; - - status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); - address = (u64) kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; - address |= kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); - - dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", - status & 0xFF, - kbase_exception_name(kbdev, status), - address); - if (multiple) - dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); -} - -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) -{ - u32 irq_mask; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->cache_clean_in_progress) { - /* If this is called while another clean is in progress, we - * can't rely on the current one to flush any new changes in - * the cache. Instead, trigger another cache clean immediately - * after this one finishes. - */ - kbdev->cache_clean_queued = true; - return; - } - - /* Enable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask | CLEAN_CACHES_COMPLETED); - - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES); - - kbdev->cache_clean_in_progress = true; -} - -void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_gpu_start_cache_clean_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -static void kbase_clean_caches_done(struct kbase_device *kbdev) -{ - u32 irq_mask; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (kbdev->cache_clean_queued) { - kbdev->cache_clean_queued = false; - - KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES); - } else { - /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~CLEAN_CACHES_COMPLETED); - - kbdev->cache_clean_in_progress = false; - - wake_up(&kbdev->cache_clean_wait); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - while (kbdev->cache_clean_in_progress) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - wait_event_interruptible(kbdev->cache_clean_wait, - !kbdev->cache_clean_in_progress); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) -{ - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); - if (val & GPU_FAULT) - kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); - - if (val & RESET_COMPLETED) - kbase_pm_reset_done(kbdev); - - if (val & PRFCNT_SAMPLE_COMPLETED) - kbase_instr_hwcnt_sample_done(kbdev); - - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); - - /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must - * be called after the IRQ has been cleared. This is because it might - * trigger further power transitions and we don't want to miss the - * interrupt raised to notify us that these further transitions have - * finished. The same applies to kbase_clean_caches_done() - if another - * clean was queued, it might trigger another clean, which might - * generate another interrupt which shouldn't be missed. - */ - - if (val & CLEAN_CACHES_COMPLETED) - kbase_clean_caches_done(kbdev); - - /* When 'platform_power_down_only' is enabled, the L2 cache is not - * powered down, but flushed before the GPU power down (which is done - * by the platform code). So the L2 state machine requests a cache - * flush. And when that flush completes, the L2 state machine needs to - * be re-invoked to proceed with the GPU power down. - */ - if (val & POWER_CHANGED_ALL || - (platform_power_down_only && (val & CLEAN_CACHES_COMPLETED))) - kbase_pm_power_changed(kbdev); - - KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_internal.h deleted file mode 100755 index 7886e96dd90f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_device_internal.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Backend-specific HW access device APIs - */ - -#ifndef _KBASE_DEVICE_INTERNAL_H_ -#define _KBASE_DEVICE_INTERNAL_H_ - -/** - * kbase_reg_write - write to GPU register - * @kbdev: Kbase device pointer - * @offset: Offset of register - * @value: Value to write - * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). - */ -void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); - -/** - * kbase_reg_read - read from GPU register - * @kbdev: Kbase device pointer - * @offset: Offset of register - * - * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). - * - * Return: Value in desired register - */ -u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); - -/** - * kbase_gpu_start_cache_clean - Start a cache clean - * @kbdev: Kbase device - * - * Issue a cache clean and invalidate command to hardware. This function will - * take hwaccess_lock. - */ -void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); - -/** - * kbase_gpu_start_cache_clean_nolock - Start a cache clean - * @kbdev: Kbase device - * - * Issue a cache clean and invalidate command to hardware. hwaccess_lock - * must be held by the caller. - */ -void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); - -/** - * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish - * @kbdev: Kbase device - * - * This function will take hwaccess_lock, and may sleep. - */ -void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); - -/** - * kbase_gpu_interrupt - GPU interrupt handler - * @kbdev: Kbase device pointer - * @val: The value of the GPU IRQ status register which triggered the call - * - * This function is called from the interrupt handler when a GPU irq is to be - * handled. - */ -void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); - -#endif /* _KBASE_DEVICE_INTERNAL_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpu.c deleted file mode 100755 index 995d34da0c6f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpu.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend APIs - */ -#include -#include -#include -#include -#include -#include - -int kbase_backend_early_init(struct kbase_device *kbdev) -{ - int err; - - err = kbasep_platform_device_init(kbdev); - if (err) - return err; - - err = kbase_pm_runtime_init(kbdev); - if (err) - goto fail_runtime_pm; - - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - /* Find out GPU properties based on the GPU feature registers */ - kbase_gpuprops_set(kbdev); - - /* We're done accessing the GPU registers for now. */ - kbase_pm_register_access_disable(kbdev); - - err = kbase_install_interrupts(kbdev); - if (err) - goto fail_interrupts; - - err = kbase_hwaccess_pm_early_init(kbdev); - if (err) - goto fail_pm; - - return 0; - -fail_pm: - kbase_release_interrupts(kbdev); -fail_interrupts: - kbase_pm_runtime_term(kbdev); -fail_runtime_pm: - kbasep_platform_device_term(kbdev); - - return err; -} - -void kbase_backend_early_term(struct kbase_device *kbdev) -{ - kbase_hwaccess_pm_early_term(kbdev); - kbase_release_interrupts(kbdev); - kbase_pm_runtime_term(kbdev); - kbasep_platform_device_term(kbdev); -} - -int kbase_backend_late_init(struct kbase_device *kbdev) -{ - int err; - - err = kbase_hwaccess_pm_late_init(kbdev); - if (err) - return err; - - err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); - if (err) - goto fail_pm_powerup; - - err = kbase_backend_timer_init(kbdev); - if (err) - goto fail_timer; - -#ifdef CONFIG_MALI_DEBUG -#ifndef CONFIG_MALI_NO_MALI - if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { - dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); - err = -EINVAL; - goto fail_interrupt_test; - } -#endif /* !CONFIG_MALI_NO_MALI */ -#endif /* CONFIG_MALI_DEBUG */ - - err = kbase_job_slot_init(kbdev); - if (err) - goto fail_job_slot; - - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); - - return 0; - -fail_job_slot: - -#ifdef CONFIG_MALI_DEBUG -#ifndef CONFIG_MALI_NO_MALI -fail_interrupt_test: -#endif /* !CONFIG_MALI_NO_MALI */ -#endif /* CONFIG_MALI_DEBUG */ - - kbase_backend_timer_term(kbdev); -fail_timer: - kbase_hwaccess_pm_halt(kbdev); -fail_pm_powerup: - kbase_hwaccess_pm_late_term(kbdev); - - return err; -} - -void kbase_backend_late_term(struct kbase_device *kbdev) -{ - kbase_job_slot_halt(kbdev); - kbase_job_slot_term(kbdev); - kbase_backend_timer_term(kbdev); - kbase_hwaccess_pm_halt(kbdev); - kbase_hwaccess_pm_late_term(kbdev); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpuprops_backend.c deleted file mode 100755 index 39773e6e63aa..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel property query backend APIs - */ - -#include -#include -#include -#include - -void kbase_backend_gpuprops_get(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) -{ - int i; - - /* Fill regdump with the content of the relevant registers */ - regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); - - regdump->l2_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_FEATURES)); - regdump->core_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CORE_FEATURES)); - regdump->tiler_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_FEATURES)); - regdump->mem_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MEM_FEATURES)); - regdump->mmu_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(MMU_FEATURES)); - regdump->as_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(AS_PRESENT)); - regdump->js_present = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_PRESENT)); - - for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) - regdump->js_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JS_FEATURES_REG(i))); - - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - regdump->texture_features[i] = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); - - regdump->thread_max_threads = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_THREADS)); - regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); - regdump->thread_max_barrier_size = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); - regdump->thread_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_FEATURES)); - regdump->thread_tls_alloc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(THREAD_TLS_ALLOC)); - - regdump->shader_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_LO)); - regdump->shader_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_PRESENT_HI)); - - regdump->tiler_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_LO)); - regdump->tiler_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_PRESENT_HI)); - - regdump->l2_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_LO)); - regdump->l2_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_PRESENT_HI)); - - regdump->stack_present_lo = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_LO)); - regdump->stack_present_hi = kbase_reg_read(kbdev, - GPU_CONTROL_REG(STACK_PRESENT_HI)); -} - -void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump) -{ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - regdump->coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES)); - - /* We're done accessing the GPU registers for now. */ - kbase_pm_register_access_disable(kbdev); - } else { - /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ - regdump->coherency_features = - COHERENCY_FEATURE_BIT(COHERENCY_NONE) | - COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); - } -} - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_backend.c deleted file mode 100755 index 79c04d9abaef..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_backend.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * GPU backend instrumentation APIs. - */ - -#include -#include -#include -#include -#include -#include - -int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_instr_hwcnt_enable *enable) -{ - unsigned long flags; - int err = -EINVAL; - u32 irq_mask; - u32 prfcnt_config; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* alignment failure */ - if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) - goto out_err; - - /* Override core availability policy to ensure all cores are available - */ - kbase_pm_ca_instr_enable(kbdev); - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { - /* Instrumentation is already enabled */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - goto out_err; - } - - /* Enable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | - PRFCNT_SAMPLE_COMPLETED); - - /* In use, this context is the owner */ - kbdev->hwcnt.kctx = kctx; - /* Remember the dump address so we can reprogram it later */ - kbdev->hwcnt.addr = enable->dump_buffer; - kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - /* Configure */ - prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; - if (enable->use_secondary) - { - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) - >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); - - if (arch_v6) - prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; - } - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - enable->dump_buffer & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - enable->dump_buffer >> 32); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->jm_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), - enable->shader_bm); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), - enable->mmu_l2_bm); - /* Due to PRLAM-8186 we need to disable the Tiler before we enable the - * HW counter dump. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0); - else - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); - - /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump - */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), - enable->tiler_bm); - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - err = 0; - - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); - return err; - out_err: - return err; -} - -int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) -{ - unsigned long flags, pm_flags; - int err = -EINVAL; - u32 irq_mask; - struct kbase_device *kbdev = kctx->kbdev; - - while (1) { - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { - /* Instrumentation is not enabled */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - goto out; - } - - if (kbdev->hwcnt.kctx != kctx) { - /* Instrumentation has been setup for another context */ - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - goto out; - } - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) - break; - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - /* Ongoing dump/setup - wait for its completion */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - } - - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; - kbdev->hwcnt.backend.triggered = 0; - - /* Disable interrupt */ - irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), - irq_mask & ~PRFCNT_SAMPLE_COMPLETED); - - /* Disable the counters */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); - - kbdev->hwcnt.kctx = NULL; - kbdev->hwcnt.addr = 0ULL; - kbdev->hwcnt.addr_bytes = 0ULL; - - kbase_pm_ca_instr_disable(kbdev); - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", - kctx); - - err = 0; - - out: - return err; -} - -int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) -{ - unsigned long flags; - int err = -EINVAL; - struct kbase_device *kbdev = kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.kctx != kctx) { - /* The instrumentation has been setup for another context */ - goto unlock; - } - - if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { - /* HW counters are disabled or another dump is ongoing, or we're - * resetting */ - goto unlock; - } - - kbdev->hwcnt.backend.triggered = 0; - - /* Mark that we're dumping - the PF handler can signal that we faulted - */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; - - /* Reconfigure the dump address */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), - kbdev->hwcnt.addr & 0xFFFFFFFF); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), - kbdev->hwcnt.addr >> 32); - - /* Start dumping */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, - kbdev->hwcnt.addr, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_SAMPLE); - - dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); - - err = 0; - - unlock: - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); - -bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, - bool * const success) -{ - unsigned long flags; - bool complete = false; - struct kbase_device *kbdev = kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { - *success = true; - complete = true; - } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { - *success = false; - complete = true; - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - } - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - return complete; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); - -void kbasep_cache_clean_worker(struct work_struct *data) -{ - struct kbase_device *kbdev; - unsigned long flags, pm_flags; - - kbdev = container_of(data, struct kbase_device, - hwcnt.backend.cache_clean_work); - - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - /* Clean and invalidate the caches so we're sure the mmu tables for the - * dump buffer is valid. - */ - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - kbase_gpu_start_cache_clean_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - kbase_gpu_wait_cache_clean(kbdev); - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - /* All finished and idle */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -} - -void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { - if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { - /* All finished and idle */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - } else { - int ret; - /* Always clean and invalidate the cache after a successful dump - */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; - ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, - &kbdev->hwcnt.backend.cache_clean_work); - KBASE_DEBUG_ASSERT(ret); - } - } - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -} - -int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - unsigned long flags; - int err; - - /* Wait for dump & cache clean to complete */ - wait_event(kbdev->hwcnt.backend.wait, - kbdev->hwcnt.backend.triggered != 0); - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { - err = -EINVAL; - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - } else { - /* Dump done */ - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_IDLE); - err = 0; - } - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - - return err; -} - -int kbase_instr_hwcnt_clear(struct kbase_context *kctx) -{ - unsigned long flags; - int err = -EINVAL; - struct kbase_device *kbdev = kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - /* Check it's the context previously set up and we're not already - * dumping */ - if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_IDLE) - goto out; - - /* Clear the counters */ - KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_PRFCNT_CLEAR); - - err = 0; - -out: - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; -} -KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); - -int kbase_instr_backend_init(struct kbase_device *kbdev) -{ - int ret = 0; - - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; - - init_waitqueue_head(&kbdev->hwcnt.backend.wait); - INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, - kbasep_cache_clean_worker); - kbdev->hwcnt.backend.triggered = 0; - - kbdev->hwcnt.backend.cache_clean_wq = - alloc_workqueue("Mali cache cleaning workqueue", 0, 1); - if (NULL == kbdev->hwcnt.backend.cache_clean_wq) - ret = -EINVAL; - - return ret; -} - -void kbase_instr_backend_term(struct kbase_device *kbdev) -{ - destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_defs.h deleted file mode 100755 index c9fb7593a936..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_defs.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Backend-specific instrumentation definitions - */ - -#ifndef _KBASE_INSTR_DEFS_H_ -#define _KBASE_INSTR_DEFS_H_ - -/* - * Instrumentation State Machine States - */ -enum kbase_instr_state { - /* State where instrumentation is not active */ - KBASE_INSTR_STATE_DISABLED = 0, - /* State machine is active and ready for a command. */ - KBASE_INSTR_STATE_IDLE, - /* Hardware is currently dumping a frame. */ - KBASE_INSTR_STATE_DUMPING, - /* We've requested a clean to occur on a workqueue */ - KBASE_INSTR_STATE_REQUEST_CLEAN, - /* An error has occured during DUMPING (page fault). */ - KBASE_INSTR_STATE_FAULT -}; - -/* Structure used for instrumentation and HW counters dumping */ -struct kbase_instr_backend { - wait_queue_head_t wait; - int triggered; - - enum kbase_instr_state state; - struct workqueue_struct *cache_clean_wq; - struct work_struct cache_clean_work; -}; - -#endif /* _KBASE_INSTR_DEFS_H_ */ - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_internal.h deleted file mode 100755 index 2254b9f30d02..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_instr_internal.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Backend-specific HW access instrumentation APIs - */ - -#ifndef _KBASE_INSTR_INTERNAL_H_ -#define _KBASE_INSTR_INTERNAL_H_ - -/** - * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning - * @data: a &struct work_struct - */ -void kbasep_cache_clean_worker(struct work_struct *data); - -/** - * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received - * @kbdev: Kbase device - */ -void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); - -#endif /* _KBASE_INSTR_INTERNAL_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_internal.h deleted file mode 100755 index ca3c048b637a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_internal.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Backend specific IRQ APIs - */ - -#ifndef _KBASE_IRQ_INTERNAL_H_ -#define _KBASE_IRQ_INTERNAL_H_ - -int kbase_install_interrupts(struct kbase_device *kbdev); - -void kbase_release_interrupts(struct kbase_device *kbdev); - -/** - * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed - * execution - * @kbdev: The kbase device - */ -void kbase_synchronize_irqs(struct kbase_device *kbdev); - -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev); - -#endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_linux.c deleted file mode 100755 index dd0279a03abc..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_irq_linux.c +++ /dev/null @@ -1,474 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include - -#include - -#if !defined(CONFIG_MALI_NO_MALI) - -/* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 - -static void *kbase_tag(void *ptr, u32 tag) -{ - return (void *)(((uintptr_t) ptr) | tag); -} - -static void *kbase_untag(void *ptr) -{ - return (void *)(((uintptr_t) ptr) & ~3); -} - -static irqreturn_t kbase_job_irq_handler(int irq, void *data) -{ - unsigned long flags; - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!kbdev->pm.backend.gpu_powered) { - /* GPU is turned off - IRQ is not for us */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return IRQ_NONE; - } - - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); - -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!val) - return IRQ_NONE; - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); - - kbase_job_done(kbdev, val); - - return IRQ_HANDLED; -} - -KBASE_EXPORT_TEST_API(kbase_job_irq_handler); - -static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) -{ - unsigned long flags; - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!kbdev->pm.backend.gpu_powered) { - /* GPU is turned off - IRQ is not for us */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return IRQ_NONE; - } - - atomic_inc(&kbdev->faults_pending); - - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); - -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!val) { - atomic_dec(&kbdev->faults_pending); - return IRQ_NONE; - } - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); - - kbase_mmu_interrupt(kbdev, val); - - atomic_dec(&kbdev->faults_pending); - - return IRQ_HANDLED; -} - -static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) -{ - unsigned long flags; - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!kbdev->pm.backend.gpu_powered) { - /* GPU is turned off - IRQ is not for us */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return IRQ_NONE; - } - - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); - -#ifdef CONFIG_MALI_DEBUG - if (!kbdev->pm.backend.driver_ready_for_irqs) - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", - __func__, irq, val); -#endif /* CONFIG_MALI_DEBUG */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!val) - return IRQ_NONE; - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); - - kbase_gpu_interrupt(kbdev, val); - - return IRQ_HANDLED; -} - -KBASE_EXPORT_TEST_API(kbase_gpu_irq_handler); - -static irq_handler_t kbase_handler_table[] = { - [JOB_IRQ_TAG] = kbase_job_irq_handler, - [MMU_IRQ_TAG] = kbase_mmu_irq_handler, - [GPU_IRQ_TAG] = kbase_gpu_irq_handler, -}; - -#ifdef CONFIG_MALI_DEBUG -#define JOB_IRQ_HANDLER JOB_IRQ_TAG -#define MMU_IRQ_HANDLER MMU_IRQ_TAG -#define GPU_IRQ_HANDLER GPU_IRQ_TAG - -/** - * kbase_set_custom_irq_handler - Set a custom IRQ handler - * @kbdev: Device for which the handler is to be registered - * @custom_handler: Handler to be registered - * @irq_type: Interrupt type - * - * Registers given interrupt handler for requested interrupt type - * In the case where irq handler is not specified, the default handler shall be - * registered - * - * Return: 0 case success, error code otherwise - */ -int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type) -{ - int result = 0; - irq_handler_t requested_irq_handler = NULL; - - KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && - (GPU_IRQ_HANDLER >= irq_type)); - - /* Release previous handler */ - if (kbdev->irqs[irq_type].irq) - free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); - - requested_irq_handler = (NULL != custom_handler) ? custom_handler : - kbase_handler_table[irq_type]; - - if (0 != request_irq(kbdev->irqs[irq_type].irq, - requested_irq_handler, - kbdev->irqs[irq_type].flags | IRQF_SHARED, - dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { - result = -EINVAL; - dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[irq_type].irq, irq_type); -#ifdef CONFIG_SPARSE_IRQ - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -#endif /* CONFIG_SPARSE_IRQ */ - } - - return result; -} - -KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); - -/* test correct interrupt assigment and reception by cpu */ -struct kbasep_irq_test { - struct hrtimer timer; - wait_queue_head_t wait; - int triggered; - u32 timeout; -}; - -static struct kbasep_irq_test kbasep_irq_test_data; - -#define IRQ_TEST_TIMEOUT 500 - -static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) -{ - unsigned long flags; - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!kbdev->pm.backend.gpu_powered) { - /* GPU is turned off - IRQ is not for us */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return IRQ_NONE; - } - - val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); - - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!val) - return IRQ_NONE; - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); - - kbasep_irq_test_data.triggered = 1; - wake_up(&kbasep_irq_test_data.wait); - - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); - - return IRQ_HANDLED; -} - -static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) -{ - unsigned long flags; - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!kbdev->pm.backend.gpu_powered) { - /* GPU is turned off - IRQ is not for us */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return IRQ_NONE; - } - - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); - - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (!val) - return IRQ_NONE; - - dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); - - kbasep_irq_test_data.triggered = 1; - wake_up(&kbasep_irq_test_data.wait); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); - - return IRQ_HANDLED; -} - -static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) -{ - struct kbasep_irq_test *test_data = container_of(timer, - struct kbasep_irq_test, timer); - - test_data->timeout = 1; - test_data->triggered = 1; - wake_up(&test_data->wait); - return HRTIMER_NORESTART; -} - -static int kbasep_common_test_interrupt( - struct kbase_device * const kbdev, u32 tag) -{ - int err = 0; - irq_handler_t test_handler; - - u32 old_mask_val; - u16 mask_offset; - u16 rawstat_offset; - - switch (tag) { - case JOB_IRQ_TAG: - test_handler = kbase_job_irq_test_handler; - rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); - mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); - break; - case MMU_IRQ_TAG: - test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_REG(MMU_IRQ_MASK); - break; - case GPU_IRQ_TAG: - /* already tested by pm_driver - bail out */ - default: - return 0; - } - - /* store old mask */ - old_mask_val = kbase_reg_read(kbdev, mask_offset); - /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); - - if (kbdev->irqs[tag].irq) { - /* release original handler and install test handler */ - if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { - err = -EINVAL; - } else { - kbasep_irq_test_data.timeout = 0; - hrtimer_init(&kbasep_irq_test_data.timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - kbasep_irq_test_data.timer.function = - kbasep_test_interrupt_timeout; - - /* trigger interrupt */ - kbase_reg_write(kbdev, mask_offset, 0x1); - kbase_reg_write(kbdev, rawstat_offset, 0x1); - - hrtimer_start(&kbasep_irq_test_data.timer, - HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), - HRTIMER_MODE_REL); - - wait_event(kbasep_irq_test_data.wait, - kbasep_irq_test_data.triggered != 0); - - if (kbasep_irq_test_data.timeout != 0) { - dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", - kbdev->irqs[tag].irq, tag); - err = -EINVAL; - } else { - dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", - kbdev->irqs[tag].irq, tag); - } - - hrtimer_cancel(&kbasep_irq_test_data.timer); - kbasep_irq_test_data.triggered = 0; - - /* mask interrupts */ - kbase_reg_write(kbdev, mask_offset, 0x0); - - /* release test handler */ - free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); - } - - /* restore original interrupt */ - if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], - kbdev->irqs[tag].flags | IRQF_SHARED, - dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { - dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", - kbdev->irqs[tag].irq, tag); - err = -EINVAL; - } - } - /* restore old mask */ - kbase_reg_write(kbdev, mask_offset, old_mask_val); - - return err; -} - -int kbasep_common_test_interrupt_handlers( - struct kbase_device * const kbdev) -{ - int err; - - init_waitqueue_head(&kbasep_irq_test_data.wait); - kbasep_irq_test_data.triggered = 0; - - /* A suspend won't happen during startup/insmod */ - kbase_pm_context_active(kbdev); - - err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); - if (err) { - dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); - goto out; - } - - err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); - if (err) { - dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); - goto out; - } - - dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); - - out: - kbase_pm_context_idle(kbdev); - - return err; -} -#endif /* CONFIG_MALI_DEBUG */ - -int kbase_install_interrupts(struct kbase_device *kbdev) -{ - u32 nr = ARRAY_SIZE(kbase_handler_table); - int err; - u32 i; - - for (i = 0; i < nr; i++) { - err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], - kbdev->irqs[i].flags | IRQF_SHARED, - dev_name(kbdev->dev), - kbase_tag(kbdev, i)); - if (err) { - dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", - kbdev->irqs[i].irq, i); -#ifdef CONFIG_SPARSE_IRQ - dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); -#endif /* CONFIG_SPARSE_IRQ */ - goto release; - } - } - - return 0; - - release: - while (i-- > 0) - free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); - - return err; -} - -void kbase_release_interrupts(struct kbase_device *kbdev) -{ - u32 nr = ARRAY_SIZE(kbase_handler_table); - u32 i; - - for (i = 0; i < nr; i++) { - if (kbdev->irqs[i].irq) - free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); - } -} - -void kbase_synchronize_irqs(struct kbase_device *kbdev) -{ - u32 nr = ARRAY_SIZE(kbase_handler_table); - u32 i; - - for (i = 0; i < nr; i++) { - if (kbdev->irqs[i].irq) - synchronize_irq(kbdev->irqs[i].irq); - } -} - -#endif /* !defined(CONFIG_MALI_NO_MALI) */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_as.c deleted file mode 100755 index c8153ba4c121..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_as.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register backend context / address space management - */ - -#include -#include -#include - -/** - * assign_and_activate_kctx_addr_space - Assign an AS to a context - * @kbdev: Kbase device - * @kctx: Kbase context - * @current_as: Address Space to assign - * - * Assign an Address Space (AS) to a context, and add the context to the Policy. - * - * This includes - * setting up the global runpool_irq structure and the context on the AS, - * Activating the MMU on the AS, - * Allowing jobs to be submitted on the AS. - * - * Context: - * kbasep_js_kctx_info.jsctx_mutex held, - * kbasep_js_device_data.runpool_mutex held, - * AS transaction mutex held, - * Runpool IRQ lock held - */ -static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_as *current_as) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* Attribute handling */ - kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); - - /* Allow it to run jobs */ - kbasep_js_set_submit_allowed(js_devdata, kctx); - - kbase_js_runpool_inc_context_count(kbdev, kctx); -} - -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - int i; - - if (kbdev->hwaccess.active_kctx[js] == kctx) { - /* Context is already active */ - return true; - } - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - if (kbdev->as_to_kctx[i] == kctx) { - /* Context already has ASID - mark as active */ - return true; - } - } - - /* Context does not have address space assigned */ - return false; -} - -void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - int as_nr = kctx->as_nr; - - if (as_nr == KBASEP_AS_NR_INVALID) { - WARN(1, "Attempting to release context without ASID\n"); - return; - } - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (atomic_read(&kctx->refcount) != 1) { - WARN(1, "Attempting to release active ASID\n"); - return; - } - - kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); - - kbase_ctx_sched_release_ctx(kctx); - kbase_js_runpool_dec_context_count(kbdev, kctx); -} - -void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ -} - -int kbase_backend_find_and_release_free_address_space( - struct kbase_device *kbdev, struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - unsigned long flags; - int i; - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbasep_js_kctx_info *as_js_kctx_info; - struct kbase_context *as_kctx; - - as_kctx = kbdev->as_to_kctx[i]; - as_js_kctx_info = &as_kctx->jctx.sched_info; - - /* Don't release privileged or active contexts, or contexts with - * jobs running. - * Note that a context will have at least 1 reference (which - * was previously taken by kbasep_js_schedule_ctx()) until - * descheduled. - */ - if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && - atomic_read(&as_kctx->refcount) == 1) { - if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, - as_kctx)) { - WARN(1, "Failed to retain active context\n"); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - return KBASEP_AS_NR_INVALID; - } - - kbasep_js_clear_submit_allowed(js_devdata, as_kctx); - - /* Drop and retake locks to take the jsctx_mutex on the - * context we're about to release without violating lock - * ordering - */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - - /* Release context from address space */ - mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - - kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); - - if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, - as_kctx, - true); - - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); - - return i; - } - - /* Context was retained while locks were dropped, - * continue looking for free AS */ - - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - } - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - return KBASEP_AS_NR_INVALID; -} - -bool kbase_backend_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int as_nr) -{ - struct kbasep_js_device_data *js_devdata; - struct kbase_as *new_address_space = NULL; - int js; - - js_devdata = &kbdev->js_data; - - for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == kctx) { - WARN(1, "Context is already scheduled in\n"); - return false; - } - } - - new_address_space = &kbdev->as[as_nr]; - - lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); - - if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { - /* We need to retain it to keep the corresponding address space - */ - kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - } - - return true; -} - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_defs.h deleted file mode 100755 index b4d2ae1cc4e8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_defs.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend specific definitions - */ - -#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ -#define _KBASE_HWACCESS_GPU_DEFS_H_ - -/* SLOT_RB_SIZE must be < 256 */ -#define SLOT_RB_SIZE 2 -#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) - -/** - * struct rb_entry - Ringbuffer entry - * @katom: Atom associated with this entry - */ -struct rb_entry { - struct kbase_jd_atom *katom; -}; - -/** - * struct slot_rb - Slot ringbuffer - * @entries: Ringbuffer entries - * @last_context: The last context to submit a job on this slot - * @read_idx: Current read index of buffer - * @write_idx: Current write index of buffer - * @job_chain_flag: Flag used to implement jobchain disambiguation - */ -struct slot_rb { - struct rb_entry entries[SLOT_RB_SIZE]; - - struct kbase_context *last_context; - - u8 read_idx; - u8 write_idx; - - u8 job_chain_flag; -}; - -/** - * struct kbase_backend_data - GPU backend specific data for HW access layer - * @slot_rb: Slot ringbuffers - * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines - * whether slots 0/1 or slot 2 are currently being - * pulled from - * @scheduling_timer: The timer tick used for rescheduling jobs - * @timer_running: Is the timer running? The runpool_mutex must be - * held whilst modifying this. - * @suspend_timer: Is the timer suspended? Set when a suspend - * occurs and cleared on resume. The runpool_mutex - * must be held whilst modifying this. - * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) - * @reset_workq: Work queue for performing the reset - * @reset_work: Work item for performing the reset - * @reset_wait: Wait event signalled when the reset is complete - * @reset_timer: Timeout for soft-stops before the reset - * @timeouts_updated: Have timeout values just been updated? - * - * The hwaccess_lock (a spinlock) must be held when accessing this structure - */ -struct kbase_backend_data { - struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; - - bool rmu_workaround_flag; - - struct hrtimer scheduling_timer; - - bool timer_running; - bool suspend_timer; - - atomic_t reset_gpu; - -/* The GPU reset isn't pending */ -#define KBASE_RESET_GPU_NOT_PENDING 0 -/* kbase_prepare_to_reset_gpu has been called */ -#define KBASE_RESET_GPU_PREPARED 1 -/* kbase_reset_gpu has been called - the reset will now definitely happen - * within the timeout period */ -#define KBASE_RESET_GPU_COMMITTED 2 -/* The GPU reset process is currently occuring (timeout has expired or - * kbasep_try_reset_gpu_early was called) */ -#define KBASE_RESET_GPU_HAPPENING 3 -/* Reset the GPU silently, used when resetting the GPU as part of normal - * behavior (e.g. when exiting protected mode). */ -#define KBASE_RESET_GPU_SILENT 4 - struct workqueue_struct *reset_workq; - struct work_struct reset_work; - wait_queue_head_t reset_wait; - struct hrtimer reset_timer; - - bool timeouts_updated; -}; - -#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_hw.c deleted file mode 100755 index acd4a5aff94b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_hw.c +++ /dev/null @@ -1,1465 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Base kernel job manager APIs - */ - -#include -#include -#include -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#define beenthere(kctx, f, a...) \ - dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) - -static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); -static void kbasep_reset_timeout_worker(struct work_struct *data); -static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); - -static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, - struct kbase_context *kctx) -{ - return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); -} - -static u64 kbase_job_write_affinity(struct kbase_device *kbdev, - base_jd_core_req core_req, - int js) -{ - u64 affinity; - - if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == - BASE_JD_REQ_T) { - /* Tiler-only atom */ - /* If the hardware supports XAFFINITY then we'll only enable - * the tiler (which is the default so this is a no-op), - * otherwise enable shader core 0. - */ - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - affinity = 1; - else - affinity = 0; - } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { - unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; - struct mali_base_gpu_coherent_group_info *coherency_info = - &kbdev->gpu_props.props.coherency_info; - - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; - - /* JS2 on a dual core group system targets core group 1. All - * other cases target core group 0. - */ - if (js == 2 && num_core_groups > 1) - affinity &= coherency_info->group[1].core_mask; - else - affinity &= coherency_info->group[0].core_mask; - } else { - /* Use all cores */ - affinity = kbdev->pm.backend.shaders_avail & - kbdev->pm.debug_core_mask[js]; - } - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - affinity & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - affinity >> 32); - - return affinity; -} - -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js) -{ - struct kbase_context *kctx; - u32 cfg; - u64 jc_head = katom->jc; - u64 affinity; - - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(katom); - - kctx = katom->kctx; - - /* Command register must be available */ - KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), - jc_head & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), - jc_head >> 32); - - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); - - /* start MMU, medium priority, cache clean/flush on end, clean/flush on - * start */ - cfg = kctx->as_nr; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && - !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) - cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; - - if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) - cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; - else - cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; - - if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && - !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) - cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; - else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) - cfg |= JS_CONFIG_END_FLUSH_CLEAN; - else - cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649)) - cfg |= JS_CONFIG_START_MMU; - - cfg |= JS_CONFIG_THREAD_PRI(8); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && - (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) - cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; - - if (kbase_hw_has_feature(kbdev, - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { - cfg |= JS_CONFIG_JOB_CHAIN_FLAG; - katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; - kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = - true; - } else { - katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; - kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = - false; - } - } - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), - katom->flush_id); - - /* Write an approximate start timestamp. - * It's approximate because there might be a job in the HEAD register. - */ - katom->start_timestamp = ktime_get(); - - /* GO ! */ - dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", - katom, kctx, js, jc_head); - - KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, - (u32)affinity); - -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event( - GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), - kctx, kbase_jd_atom_id(kctx, katom)); -#endif - KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head, - affinity, cfg); - KBASE_TLSTREAM_TL_RET_CTX_LPU( - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); - KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); - KBASE_TLSTREAM_TL_RET_ATOM_LPU( - katom, - &kbdev->gpu_props.props.raw_props.js_features[js], - "ctx_nr,atom_nr"); -#ifdef CONFIG_GPU_TRACEPOINTS - if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { - /* If this is the only job on the slot, trace it as starting */ - char js_string[16]; - - trace_gpu_sched_switch( - kbasep_make_job_slot_string(js, js_string, - sizeof(js_string)), - ktime_to_ns(katom->start_timestamp), - (u32)katom->kctx->id, 0, katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; - } -#endif - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_START); -} - -/** - * kbasep_job_slot_update_head_start_timestamp - Update timestamp - * @kbdev: kbase device - * @js: job slot - * @end_timestamp: timestamp - * - * Update the start_timestamp of the job currently in the HEAD, based on the - * fact that we got an IRQ for the previous set of completed jobs. - * - * The estimate also takes into account the time the job was submitted, to - * work out the best estimate (which might still result in an over-estimate to - * the calculated time spent) - */ -static void kbasep_job_slot_update_head_start_timestamp( - struct kbase_device *kbdev, - int js, - ktime_t end_timestamp) -{ - if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) { - struct kbase_jd_atom *katom; - ktime_t timestamp_diff; - /* The atom in the HEAD */ - katom = kbase_gpu_inspect(kbdev, js, 0); - - KBASE_DEBUG_ASSERT(katom != NULL); - - timestamp_diff = ktime_sub(end_timestamp, - katom->start_timestamp); - if (ktime_to_ns(timestamp_diff) >= 0) { - /* Only update the timestamp if it's a better estimate - * than what's currently stored. This is because our - * estimate that accounts for the throttle time may be - * too much of an overestimate */ - katom->start_timestamp = end_timestamp; - } - } -} - -/** - * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline - * tracepoint - * @kbdev: kbase device - * @js: job slot - * - * Make a tracepoint call to the instrumentation module informing that - * softstop happened on given lpu (job slot). - */ -static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, - int js) -{ - KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( - &kbdev->gpu_props.props.raw_props.js_features[js]); -} - -void kbase_job_done(struct kbase_device *kbdev, u32 done) -{ - unsigned long flags; - int i; - u32 count = 0; - ktime_t end_timestamp = ktime_get(); - - KBASE_DEBUG_ASSERT(kbdev); - - KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - while (done) { - u32 failed = done >> 16; - - /* treat failed slots as finished slots */ - u32 finished = (done & 0xFFFF) | failed; - - /* Note: This is inherently unfair, as we always check - * for lower numbered interrupts before the higher - * numbered ones.*/ - i = ffs(finished) - 1; - KBASE_DEBUG_ASSERT(i >= 0); - - do { - int nr_done; - u32 active; - u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ - u64 job_tail = 0; - - if (failed & (1u << i)) { - /* read out the job slot status code if the job - * slot reported failure */ - completion_code = kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_STATUS)); - - if (completion_code == BASE_JD_EVENT_STOPPED) { -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event( - GATOR_MAKE_EVENT( - GATOR_JOB_SLOT_SOFT_STOPPED, i), - NULL, 0); -#endif - - kbasep_trace_tl_event_lpu_softstop( - kbdev, i); - - /* Soft-stopped job - read the value of - * JS_TAIL so that the job chain can - * be resumed */ - job_tail = (u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_LO)) | - ((u64)kbase_reg_read(kbdev, - JOB_SLOT_REG(i, JS_TAIL_HI)) - << 32); - } else if (completion_code == - BASE_JD_EVENT_NOT_STARTED) { - /* PRLAM-10673 can cause a TERMINATED - * job to come back as NOT_STARTED, but - * the error interrupt helps us detect - * it */ - completion_code = - BASE_JD_EVENT_TERMINATED; - } - - kbase_gpu_irq_evict(kbdev, i, completion_code); - } - - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), - done & ((1 << i) | (1 << (i + 16)))); - active = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); - - if (((active >> i) & 1) == 0 && - (((done >> (i + 16)) & 1) == 0)) { - /* There is a potential race we must work - * around: - * - * 1. A job slot has a job in both current and - * next registers - * 2. The job in current completes - * successfully, the IRQ handler reads - * RAWSTAT and calls this function with the - * relevant bit set in "done" - * 3. The job in the next registers becomes the - * current job on the GPU - * 4. Sometime before the JOB_IRQ_CLEAR line - * above the job on the GPU _fails_ - * 5. The IRQ_CLEAR clears the done bit but not - * the failed bit. This atomically sets - * JOB_IRQ_JS_STATE. However since both jobs - * have now completed the relevant bits for - * the slot are set to 0. - * - * If we now did nothing then we'd incorrectly - * assume that _both_ jobs had completed - * successfully (since we haven't yet observed - * the fail bit being set in RAWSTAT). - * - * So at this point if there are no active jobs - * left we check to see if RAWSTAT has a failure - * bit set for the job slot. If it does we know - * that there has been a new failure that we - * didn't previously know about, so we make sure - * that we record this in active (but we wait - * for the next loop to deal with it). - * - * If we were handling a job failure (i.e. done - * has the relevant high bit set) then we know - * that the value read back from - * JOB_IRQ_JS_STATE is the correct number of - * remaining jobs because the failed job will - * have prevented any futher jobs from starting - * execution. - */ - u32 rawstat = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); - - if ((rawstat >> (i + 16)) & 1) { - /* There is a failed job that we've - * missed - add it back to active */ - active |= (1u << i); - } - } - - dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", - completion_code); - - nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); - nr_done -= (active >> i) & 1; - nr_done -= (active >> (i + 16)) & 1; - - if (nr_done <= 0) { - dev_warn(kbdev->dev, "Spurious interrupt on slot %d", - i); - - goto spurious; - } - - count += nr_done; - - while (nr_done) { - if (nr_done == 1) { - kbase_gpu_complete_hw(kbdev, i, - completion_code, - job_tail, - &end_timestamp); - kbase_jm_try_kick_all(kbdev); - } else { - /* More than one job has completed. - * Since this is not the last job being - * reported this time it must have - * passed. This is because the hardware - * will not allow further jobs in a job - * slot to complete until the failed job - * is cleared from the IRQ status. - */ - kbase_gpu_complete_hw(kbdev, i, - BASE_JD_EVENT_DONE, - 0, - &end_timestamp); - } - nr_done--; - } - spurious: - done = kbase_reg_read(kbdev, - JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { - /* Workaround for missing interrupt caused by - * PRLAM-10883 */ - if (((active >> i) & 1) && (0 == - kbase_reg_read(kbdev, - JOB_SLOT_REG(i, - JS_STATUS)))) { - /* Force job slot to be processed again - */ - done |= (1u << i); - } - } - - failed = done >> 16; - finished = (done & 0xFFFF) | failed; - if (done) - end_timestamp = ktime_get(); - } while (finished & (1 << i)); - - kbasep_job_slot_update_head_start_timestamp(kbdev, i, - end_timestamp); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_COMMITTED) { - /* If we're trying to reset the GPU then we might be able to do - * it early (without waiting for a timeout) because some jobs - * have completed - */ - kbasep_try_reset_gpu_early(kbdev); - } - KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); -} -KBASE_EXPORT_TEST_API(kbase_job_done); - -static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - bool soft_stops_allowed = true; - - if (kbase_jd_katom_is_protected(katom)) { - soft_stops_allowed = false; - } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { - if ((katom->core_req & BASE_JD_REQ_T) != 0) - soft_stops_allowed = false; - } - return soft_stops_allowed; -} - -static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, - base_jd_core_req core_reqs) -{ - bool hard_stops_allowed = true; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { - if ((core_reqs & BASE_JD_REQ_T) != 0) - hard_stops_allowed = false; - } - return hard_stops_allowed; -} - -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom) -{ -#if KBASE_TRACE_ENABLE - u32 status_reg_before; - u64 job_in_head_before; - u32 status_reg_after; - - KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); - - /* Check the head pointer */ - job_in_head_before = ((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_LO))) - | (((u64) kbase_reg_read(kbdev, - JOB_SLOT_REG(js, JS_HEAD_HI))) - << 32); - status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); -#endif - - if (action == JS_COMMAND_SOFT_STOP) { - bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, - target_katom); - - if (!soft_stop_allowed) { -#ifdef CONFIG_MALI_DEBUG - dev_dbg(kbdev->dev, - "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); -#endif /* CONFIG_MALI_DEBUG */ - return; - } - - /* We are about to issue a soft stop, so mark the atom as having - * been soft stopped */ - target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; - - /* Mark the point where we issue the soft-stop command */ - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { - int i; - - for (i = 0; - i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - - KBASE_DEBUG_ASSERT(katom); - - /* For HW_ISSUE_8316, only 'bad' jobs attacking - * the system can cause this issue: normally, - * all memory should be allocated in multiples - * of 4 pages, and growable memory should be - * changed size in multiples of 4 pages. - * - * Whilst such 'bad' jobs can be cleared by a - * GPU reset, the locking up of a uTLB entry - * caused by the bad job could also stall other - * ASs, meaning that other ASs' jobs don't - * complete in the 'grace' period before the - * reset. We don't want to lose other ASs' jobs - * when they would normally complete fine, so we - * must 'poke' the MMU regularly to help other - * ASs complete */ - kbase_as_poking_timer_retain_atom( - kbdev, katom->kctx, katom); - } - } - - if (kbase_hw_has_feature( - kbdev, - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; - } - } else if (action == JS_COMMAND_HARD_STOP) { - bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, - core_reqs); - - if (!hard_stop_allowed) { - /* Jobs can be hard-stopped for the following reasons: - * * CFS decides the job has been running too long (and - * soft-stop has not occurred). In this case the GPU - * will be reset by CFS if the job remains on the - * GPU. - * - * * The context is destroyed, kbase_jd_zap_context - * will attempt to hard-stop the job. However it also - * has a watchdog which will cause the GPU to be - * reset if the job remains on the GPU. - * - * * An (unhandled) MMU fault occurred. As long as - * BASE_HW_ISSUE_8245 is defined then the GPU will be - * reset. - * - * All three cases result in the GPU being reset if the - * hard-stop fails, so it is safe to just return and - * ignore the hard-stop request. - */ - dev_warn(kbdev->dev, - "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); - return; - } - target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - - if (kbase_hw_has_feature( - kbdev, - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; - } - } - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); - -#if KBASE_TRACE_ENABLE - status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); - if (status_reg_after == BASE_JD_EVENT_ACTIVE) { - struct kbase_jd_atom *head; - struct kbase_context *head_kctx; - - head = kbase_gpu_inspect(kbdev, js, 0); - head_kctx = head->kctx; - - if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, - head, job_in_head_before, js); - else - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - 0, js); - - switch (action) { - case JS_COMMAND_SOFT_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, - head, head->jc, js); - break; - case JS_COMMAND_SOFT_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, - head, head->jc, js); - break; - case JS_COMMAND_SOFT_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, - head, head->jc, js); - break; - case JS_COMMAND_HARD_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, - head, head->jc, js); - break; - case JS_COMMAND_HARD_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, - head, head->jc, js); - break; - case JS_COMMAND_HARD_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, - head, head->jc, js); - break; - default: - BUG(); - break; - } - } else { - if (status_reg_before == BASE_JD_EVENT_ACTIVE) - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - job_in_head_before, js); - else - KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, - 0, js); - - switch (action) { - case JS_COMMAND_SOFT_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, - js); - break; - case JS_COMMAND_SOFT_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, - 0, js); - break; - case JS_COMMAND_SOFT_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, - 0, js); - break; - case JS_COMMAND_HARD_STOP: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, - js); - break; - case JS_COMMAND_HARD_STOP_0: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, - 0, js); - break; - case JS_COMMAND_HARD_STOP_1: - KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, - 0, js); - break; - default: - BUG(); - break; - } - } -#endif -} - -void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) -{ - unsigned long flags; - struct kbase_device *kbdev; - int i; - - KBASE_DEBUG_ASSERT(kctx != NULL); - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); - - /* Cancel any remaining running jobs for this kctx */ - mutex_lock(&kctx->jctx.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Invalidate all jobs in context, to prevent re-submitting */ - for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { - if (!work_pending(&kctx->jctx.atoms[i].work)) - kctx->jctx.atoms[i].event_code = - BASE_JD_EVENT_JOB_CANCELLED; - } - - for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) - kbase_job_slot_hardstop(kctx, i, NULL); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->jctx.lock); -} - -void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, - struct kbase_jd_atom *target_katom) -{ - struct kbase_device *kbdev; - int js = target_katom->slot_nr; - int priority = target_katom->sched_priority; - int i; - bool stop_sent = false; - - KBASE_DEBUG_ASSERT(kctx != NULL); - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - if (!katom) - continue; - - if ((kbdev->js_ctx_scheduling_mode == - KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) && - (katom->kctx != kctx)) - continue; - - if (katom->sched_priority > priority) { - if (!stop_sent) - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( - target_katom); - - kbase_job_slot_softstop(kbdev, js, katom); - stop_sent = true; - } - } -} - -void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); - - timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, - kctx->jctx.job_nr == 0, timeout); - - if (timeout != 0) - timeout = wait_event_timeout( - kctx->jctx.sched_info.ctx.is_scheduled_wait, - !kbase_ctx_flag(kctx, KCTX_SCHEDULED), - timeout); - - /* Neither wait timed out; all done! */ - if (timeout != 0) - goto exit; - - if (kbase_prepare_to_reset_gpu(kbdev)) { - dev_err(kbdev->dev, - "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", - ZAP_TIMEOUT); - kbase_reset_gpu(kbdev); - } - - /* Wait for the reset to complete */ - wait_event(kbdev->hwaccess.backend.reset_wait, - atomic_read(&kbdev->hwaccess.backend.reset_gpu) - == KBASE_RESET_GPU_NOT_PENDING); -exit: - dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); - - /* Ensure that the signallers of the waitqs have finished */ - mutex_lock(&kctx->jctx.lock); - mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_unlock(&kctx->jctx.lock); -} - -u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) -{ - u32 flush_id = 0; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { - mutex_lock(&kbdev->pm.lock); - if (kbdev->pm.backend.gpu_powered) - flush_id = kbase_reg_read(kbdev, - GPU_CONTROL_REG(LATEST_FLUSH)); - mutex_unlock(&kbdev->pm.lock); - } - - return flush_id; -} - -int kbase_job_slot_init(struct kbase_device *kbdev) -{ - kbdev->hwaccess.backend.reset_workq = alloc_workqueue( - "Mali reset workqueue", 0, 1); - if (NULL == kbdev->hwaccess.backend.reset_workq) - return -EINVAL; - - INIT_WORK(&kbdev->hwaccess.backend.reset_work, - kbasep_reset_timeout_worker); - - hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - kbdev->hwaccess.backend.reset_timer.function = - kbasep_reset_timer_callback; - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_job_slot_init); - -void kbase_job_slot_halt(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -void kbase_job_slot_term(struct kbase_device *kbdev) -{ - destroy_workqueue(kbdev->hwaccess.backend.reset_workq); -} -KBASE_EXPORT_TEST_API(kbase_job_slot_term); - -/** - * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot - * @kbdev: kbase device pointer - * @kctx: context to check against - * @js: slot to check - * @target_katom: An atom to check, or NULL if all atoms from @kctx on - * slot @js should be checked - * - * This checks are based upon parameters that would normally be passed to - * kbase_job_slot_hardstop(). - * - * In the event of @target_katom being NULL, this will check the last jobs that - * are likely to be running on the slot to see if a) they belong to kctx, and - * so would be stopped, and b) whether they have AFBC - * - * In that case, It's guaranteed that a job currently executing on the HW with - * AFBC will be detected. However, this is a conservative check because it also - * detects jobs that have just completed too. - * - * Return: true when hard-stop _might_ stop an afbc atom, else false. - */ -static bool kbasep_check_for_afbc_on_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom) -{ - bool ret = false; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* When we have an atom the decision can be made straight away. */ - if (target_katom) - return !!(target_katom->core_req & BASE_JD_REQ_FS_AFBC); - - /* Otherwise, we must chweck the hardware to see if it has atoms from - * this context with AFBC. */ - for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - if (!katom) - continue; - - /* Ignore atoms from other contexts, they won't be stopped when - * we use this for checking if we should hard-stop them */ - if (katom->kctx != kctx) - continue; - - /* An atom on this slot and this context: check for AFBC */ - if (katom->core_req & BASE_JD_REQ_FS_AFBC) { - ret = true; - break; - } - } - - return ret; -} - -/** - * kbase_job_slot_softstop_swflags - Soft-stop a job with flags - * @kbdev: The kbase device - * @js: The job slot to soft-stop - * @target_katom: The job that should be soft-stopped (or NULL for any job) - * @sw_flags: Flags to pass in about the soft-stop - * - * Context: - * The job slot lock must be held when calling this function. - * The job slot must not already be in the process of being soft-stopped. - * - * Soft-stop the specified job slot, with extra information about the stop - * - * Where possible any job in the next register is evicted before the soft-stop. - */ -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags) -{ - KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); - kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, - JS_COMMAND_SOFT_STOP | sw_flags); -} - -/** - * kbase_job_slot_softstop - Soft-stop the specified job slot - * @kbdev: The kbase device - * @js: The job slot to soft-stop - * @target_katom: The job that should be soft-stopped (or NULL for any job) - * Context: - * The job slot lock must be held when calling this function. - * The job slot must not already be in the process of being soft-stopped. - * - * Where possible any job in the next register is evicted before the soft-stop. - */ -void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom) -{ - kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); -} - -/** - * kbase_job_slot_hardstop - Hard-stop the specified job slot - * @kctx: The kbase context that contains the job(s) that should - * be hard-stopped - * @js: The job slot to hard-stop - * @target_katom: The job that should be hard-stopped (or NULL for all - * jobs from the context) - * Context: - * The job slot lock must be held when calling this function. - */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom) -{ - struct kbase_device *kbdev = kctx->kbdev; - bool stopped; - /* We make the check for AFBC before evicting/stopping atoms. Note - * that no other thread can modify the slots whilst we have the - * hwaccess_lock. */ - int needs_workaround_for_afbc = - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542) - && kbasep_check_for_afbc_on_slot(kbdev, kctx, js, - target_katom); - - stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, - target_katom, - JS_COMMAND_HARD_STOP); - if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || - kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || - needs_workaround_for_afbc)) { - /* MIDBASE-2916 if a fragment job with AFBC encoding is - * hardstopped, ensure to do a soft reset also in order to - * clear the GPU status. - * Workaround for HW issue 8401 has an issue,so after - * hard-stopping just reset the GPU. This will ensure that the - * jobs leave the GPU.*/ - if (kbase_prepare_to_reset_gpu_locked(kbdev)) { - dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue"); - kbase_reset_gpu_locked(kbdev); - } - } -} - -/** - * kbase_job_check_enter_disjoint - potentiall enter disjoint mode - * @kbdev: kbase device - * @action: the event which has occurred - * @core_reqs: core requirements of the atom - * @target_katom: the atom which is being affected - * - * For a certain soft/hard-stop action, work out whether to enter disjoint - * state. - * - * This does not register multiple disjoint events if the atom has already - * started a disjoint period - * - * @core_reqs can be supplied as 0 if the atom had not started on the hardware - * (and so a 'real' soft/hard-stop was not required, but it still interrupted - * flow, perhaps on another context) - * - * kbase_job_check_leave_disjoint() should be used to end the disjoint - * state when the soft/hard-stop action is complete - */ -void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) -{ - u32 hw_action = action & JS_COMMAND_MASK; - - /* For hard-stop, don't enter if hard-stop not allowed */ - if (hw_action == JS_COMMAND_HARD_STOP && - !kbasep_hard_stop_allowed(kbdev, core_reqs)) - return; - - /* For soft-stop, don't enter if soft-stop not allowed, or isn't - * causing disjoint */ - if (hw_action == JS_COMMAND_SOFT_STOP && - !(kbasep_soft_stop_allowed(kbdev, target_katom) && - (action & JS_COMMAND_SW_CAUSES_DISJOINT))) - return; - - /* Nothing to do if already logged disjoint state on this atom */ - if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) - return; - - target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; - kbase_disjoint_state_up(kbdev); -} - -/** - * kbase_job_check_enter_disjoint - potentially leave disjoint state - * @kbdev: kbase device - * @target_katom: atom which is finishing - * - * Work out whether to leave disjoint state when finishing an atom that was - * originated by kbase_job_check_enter_disjoint(). - */ -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom) -{ - if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { - target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; - kbase_disjoint_state_down(kbdev); - } -} - -static void kbase_debug_dump_registers(struct kbase_device *kbdev) -{ - int i; - - kbase_io_history_dump(kbdev); - - dev_err(kbdev->dev, "Register state:"); - dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); - dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); - for (i = 0; i < 3; i++) { - dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), - i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); - } - dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); - dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), - kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); - dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); - dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); - dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", - kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), - kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); -} - -static void kbasep_reset_timeout_worker(struct work_struct *data) -{ - unsigned long flags; - struct kbase_device *kbdev; - ktime_t end_timestamp = ktime_get(); - struct kbasep_js_device_data *js_devdata; - bool silent = false; - u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - - KBASE_DEBUG_ASSERT(data); - - kbdev = container_of(data, struct kbase_device, - hwaccess.backend.reset_work); - - KBASE_DEBUG_ASSERT(kbdev); - js_devdata = &kbdev->js_data; - - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_SILENT) - silent = true; - - KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); - - /* Disable GPU hardware counters. - * This call will block until counters are disabled. - */ - kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - - /* Make sure the timer has completed - this cannot be done from - * interrupt context, so this cannot be done within - * kbasep_try_reset_gpu_early. */ - hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* This would re-activate the GPU. Since it's already idle, - * there's no need to reset it */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - kbase_disjoint_state_down(kbdev); - wake_up(&kbdev->hwaccess.backend.reset_wait); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return; - } - - KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_lock(&kbdev->mmu_mask_change); - kbase_pm_reset_start_locked(kbdev); - - /* We're about to flush out the IRQs and their bottom half's */ - kbdev->irq_reset_flush = true; - - /* Disable IRQ to avoid IRQ handlers to kick in after releasing the - * spinlock; this also clears any outstanding interrupts */ - kbase_pm_disable_interrupts_nolock(kbdev); - - spin_unlock(&kbdev->mmu_mask_change); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Ensure that any IRQ handlers have finished - * Must be done without any locks IRQ handlers will take */ - kbase_synchronize_irqs(kbdev); - - /* Flush out any in-flight work items */ - kbase_flush_mmu_wqs(kbdev); - - /* The flush has completed so reset the active indicator */ - kbdev->irq_reset_flush = false; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { - /* Ensure that L2 is not transitioning when we send the reset - * command */ - while (--max_loops && kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2)) - ; - - WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); - } - - mutex_lock(&kbdev->pm.lock); - /* We hold the pm lock, so there ought to be a current policy */ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); - - /* All slot have been soft-stopped and we've waited - * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we - * assume that anything that is still left on the GPU is stuck there and - * we'll kill it when we reset the GPU */ - - if (!silent) - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", - RESET_TIMEOUT); - - /* Output the state of some interesting registers to help in the - * debugging of GPU resets */ - if (!silent) - kbase_debug_dump_registers(kbdev); - - /* Complete any jobs that were still on the GPU */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->protected_mode = false; - kbase_backend_reset(kbdev, &end_timestamp); - kbase_pm_metrics_update(kbdev, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Reset the GPU */ - kbase_pm_init_hw(kbdev, 0); - - mutex_unlock(&kbdev->pm.lock); - - mutex_lock(&js_devdata->runpool_mutex); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_ctx_sched_restore_all_as(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_pm_enable_interrupts(kbdev); - - kbase_disjoint_state_down(kbdev); - - mutex_unlock(&js_devdata->runpool_mutex); - - mutex_lock(&kbdev->pm.lock); - - kbase_pm_reset_complete(kbdev); - - /* Find out what cores are required now */ - kbase_pm_update_cores_state(kbdev); - - /* Synchronously request and wait for those cores, because if - * instrumentation is enabled it would need them immediately. */ - kbase_pm_wait_for_desired_state(kbdev); - - mutex_unlock(&kbdev->pm.lock); - - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - - wake_up(&kbdev->hwaccess.backend.reset_wait); - if (!silent) - dev_err(kbdev->dev, "Reset complete"); - - /* Try submitting some jobs to restart processing */ - KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); - kbase_js_sched_all(kbdev); - - /* Process any pending slot updates */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_backend_slot_update(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - kbase_pm_context_idle(kbdev); - - /* Re-enable GPU hardware counters */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); -} - -static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) -{ - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - hwaccess.backend.reset_timer); - - KBASE_DEBUG_ASSERT(kbdev); - - /* Reset still pending? */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == - KBASE_RESET_GPU_COMMITTED) - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); - - return HRTIMER_NORESTART; -} - -/* - * If all jobs are evicted from the GPU then we can reset the GPU - * immediately instead of waiting for the timeout to elapse - */ - -static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) -{ - int i; - int pending_jobs = 0; - - KBASE_DEBUG_ASSERT(kbdev); - - /* Count the number of jobs */ - for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) - pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); - - if (pending_jobs > 0) { - /* There are still jobs on the GPU - wait */ - return; - } - - /* To prevent getting incorrect registers when dumping failed job, - * skip early reset. - */ - if (kbdev->job_fault_debug != false) - return; - - /* Check that the reset has been committed to (i.e. kbase_reset_gpu has - * been called), and that no other thread beat this thread to starting - * the reset */ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != - KBASE_RESET_GPU_COMMITTED) { - /* Reset has already occurred */ - return; - } - - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); -} - -static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbasep_try_reset_gpu_early_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -/** - * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU - * @kbdev: kbase device - * - * This function just soft-stops all the slots to ensure that as many jobs as - * possible are saved. - * - * Return: - * The function returns a boolean which should be interpreted as follows: - * true - Prepared for reset, kbase_reset_gpu_locked should be called. - * false - Another thread is performing a reset, kbase_reset_gpu should - * not be called. - */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) -{ - int i; - - KBASE_DEBUG_ASSERT(kbdev); - - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_PREPARED) != - KBASE_RESET_GPU_NOT_PENDING) { - /* Some other thread is already resetting the GPU */ - return false; - } - - kbase_disjoint_state_up(kbdev); - - for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) - kbase_job_slot_softstop(kbdev, i, NULL); - - return true; -} - -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) -{ - unsigned long flags; - bool ret; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} -KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); - -/* - * This function should be called after kbase_prepare_to_reset_gpu if it - * returns true. It should never be called without a corresponding call to - * kbase_prepare_to_reset_gpu. - * - * After this function is called (or not called if kbase_prepare_to_reset_gpu - * returned false), the caller should wait for - * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset - * has completed. - */ -void kbase_reset_gpu(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev); - - /* Note this is an assert/atomic_set because it is a software issue for - * a race to be occuring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); - - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); - - hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); - - /* Try resetting early */ - kbasep_try_reset_gpu_early(kbdev); -} -KBASE_EXPORT_TEST_API(kbase_reset_gpu); - -void kbase_reset_gpu_locked(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev); - - /* Note this is an assert/atomic_set because it is a software issue for - * a race to be occuring here */ - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_PREPARED); - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_COMMITTED); - - dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", - kbdev->reset_timeout_ms); - hrtimer_start(&kbdev->hwaccess.backend.reset_timer, - HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), - HRTIMER_MODE_REL); - - /* Try resetting early */ - kbasep_try_reset_gpu_early_locked(kbdev); -} - -int kbase_reset_gpu_silent(struct kbase_device *kbdev) -{ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_SILENT) != - KBASE_RESET_GPU_NOT_PENDING) { - /* Some other thread is already resetting the GPU */ - return -EAGAIN; - } - - kbase_disjoint_state_up(kbdev); - - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); - - return 0; -} - -bool kbase_reset_gpu_active(struct kbase_device *kbdev) -{ - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_NOT_PENDING) - return false; - - return true; -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_internal.h deleted file mode 100755 index 452ddee35581..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_internal.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Job Manager backend-specific low-level APIs. - */ - -#ifndef _KBASE_JM_HWACCESS_H_ -#define _KBASE_JM_HWACCESS_H_ - -#include -#include -#include - -#include - -/** - * kbase_job_submit_nolock() - Submit a job to a certain job-slot - * @kbdev: Device pointer - * @katom: Atom to submit - * @js: Job slot to submit on - * - * The caller must check kbasep_jm_is_submit_slots_free() != false before - * calling this. - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbase_job_submit_nolock(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, int js); - -/** - * kbase_job_done_slot() - Complete the head job on a particular job-slot - * @kbdev: Device pointer - * @s: Job slot - * @completion_code: Completion code of job reported by GPU - * @job_tail: Job tail address reported by GPU - * @end_timestamp: Timestamp of job completion - */ -void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, - u64 job_tail, ktime_t *end_timestamp); - -#ifdef CONFIG_GPU_TRACEPOINTS -static inline char *kbasep_make_job_slot_string(int js, char *js_string, - size_t js_size) -{ - snprintf(js_string, js_size, "job_slot_%i", js); - return js_string; -} -#endif - -/** - * kbase_job_hw_submit() - Submit a job to the GPU - * @kbdev: Device pointer - * @katom: Atom to submit - * @js: Job slot to submit on - * - * The caller must check kbasep_jm_is_submit_slots_free() != false before - * calling this. - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbase_job_hw_submit(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - int js); - -/** - * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop - * on the specified atom - * @kbdev: Device pointer - * @js: Job slot to stop on - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP - * @core_reqs: Core requirements of atom to stop - * @target_katom: Atom to stop - * - * The following locking conditions are made on the caller: - * - it must hold the hwaccess_lock - */ -void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, - int js, - u32 action, - base_jd_core_req core_reqs, - struct kbase_jd_atom *target_katom); - -/** - * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job - * slot belonging to a given context. - * @kbdev: Device pointer - * @kctx: Context pointer. May be NULL - * @katom: Specific atom to stop. May be NULL - * @js: Job slot to hard stop - * @action: The action to perform, either JSn_COMMAND_HARD_STOP or - * JSn_COMMAND_SOFT_STOP - * - * If no context is provided then all jobs on the slot will be soft or hard - * stopped. - * - * If a katom is provided then only that specific atom will be stopped. In this - * case the kctx parameter is ignored. - * - * Jobs that are on the slot but are not yet on the GPU will be unpulled and - * returned to the job scheduler. - * - * Return: true if an atom was stopped, false otherwise - */ -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action); - -/** - * kbase_job_slot_init - Initialise job slot framework - * @kbdev: Device pointer - * - * Called on driver initialisation - * - * Return: 0 on success - */ -int kbase_job_slot_init(struct kbase_device *kbdev); - -/** - * kbase_job_slot_halt - Halt the job slot framework - * @kbdev: Device pointer - * - * Should prevent any further job slot processing - */ -void kbase_job_slot_halt(struct kbase_device *kbdev); - -/** - * kbase_job_slot_term - Terminate job slot framework - * @kbdev: Device pointer - * - * Called on driver termination - */ -void kbase_job_slot_term(struct kbase_device *kbdev); - -/** - * kbase_gpu_cache_clean - Cause a GPU cache clean & flush - * @kbdev: Device pointer - * - * Caller must not be in IRQ context - */ -void kbase_gpu_cache_clean(struct kbase_device *kbdev); - -#endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.c deleted file mode 100755 index c714582dfd79..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.c +++ /dev/null @@ -1,1722 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend specific APIs - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Return whether the specified ringbuffer is empty. HW access lock must be - * held */ -#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) -/* Return number of atoms currently in the specified ringbuffer. HW access lock - * must be held */ -#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) - -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp); - -/** - * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer - * @kbdev: Device pointer - * @katom: Atom to enqueue - * - * Context: Caller must hold the HW access lock - */ -static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; - - WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; - rb->write_idx++; - - katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; -} - -/** - * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once - * it has been completed - * @kbdev: Device pointer - * @js: Job slot to remove atom from - * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in - * which case current time will be used. - * - * Context: Caller must hold the HW access lock - * - * Return: Atom removed from ringbuffer - */ -static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, - int js, - ktime_t *end_timestamp) -{ - struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - struct kbase_jd_atom *katom; - - if (SLOT_RB_EMPTY(rb)) { - WARN(1, "GPU ringbuffer unexpectedly empty\n"); - return NULL; - } - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; - - kbase_gpu_release_atom(kbdev, katom, end_timestamp); - - rb->read_idx++; - - katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; - - return katom; -} - -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx) -{ - struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if ((SLOT_RB_ENTRIES(rb) - 1) < idx) - return NULL; /* idx out of range */ - - return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; -} - -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js) -{ - struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; - - if (SLOT_RB_EMPTY(rb)) - return NULL; - - return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; -} - -/** - * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently - * on the GPU - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return: true if there are atoms on the GPU for slot js, - * false otherwise - */ -static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) -{ - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - - if (!katom) - return false; - if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED || - katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY) - return true; - } - - return false; -} - -/** - * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms - * currently on the GPU - * @kbdev: Device pointer - * - * Return: true if there are any atoms on the GPU, false otherwise - */ -static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) -{ - int js; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - - if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) - return true; - } - } - return false; -} - -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) -{ - int nr = 0; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - - if (katom && (katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED)) - nr++; - } - - return nr; -} - -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) -{ - int nr = 0; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < SLOT_RB_SIZE; i++) { - if (kbase_gpu_inspect(kbdev, js, i)) - nr++; - } - - return nr; -} - -static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, - enum kbase_atom_gpu_rb_state min_rb_state) -{ - int nr = 0; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); - - if (katom && (katom->gpu_rb_state >= min_rb_state)) - nr++; - } - - return nr; -} - -/** - * check_secure_atom - Check if the given atom is in the given secure state and - * has a ringbuffer state of at least - * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @katom: Atom pointer - * @secure: Desired secure state - * - * Return: true if atom is in the given state, false otherwise - */ -static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) -{ - if (katom->gpu_rb_state >= - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && - ((kbase_jd_katom_is_protected(katom) && secure) || - (!kbase_jd_katom_is_protected(katom) && !secure))) - return true; - - return false; -} - -/** - * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given - * secure state in the ringbuffers of at least - * state - * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE - * @kbdev: Device pointer - * @secure: Desired secure state - * - * Return: true if any atoms are in the given state, false otherwise - */ -static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, - bool secure) -{ - int js, i; - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (i = 0; i < SLOT_RB_SIZE; i++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, i); - - if (katom) { - if (check_secure_atom(katom, secure)) - return true; - } - } - } - - return false; -} - -int kbase_backend_slot_free(struct kbase_device *kbdev, int js) -{ - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != - KBASE_RESET_GPU_NOT_PENDING) { - /* The GPU is being reset - so prevent submission */ - return 0; - } - - return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); -} - - -static void kbase_gpu_release_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - ktime_t *end_timestamp) -{ - struct kbase_context *kctx = katom->kctx; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - switch (katom->gpu_rb_state) { - case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: - /* Should be impossible */ - WARN(1, "Attempting to release atom not in ringbuffer\n"); - break; - - case KBASE_ATOM_GPU_RB_SUBMITTED: - /* Inform power management at start/finish of atom so it can - * update its GPU utilisation metrics. Mark atom as not - * submitted beforehand. */ - katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - kbase_pm_metrics_update(kbdev, end_timestamp); - - if (katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx, - &kbdev->gpu_props.props.raw_props.js_features - [katom->slot_nr]); - - case KBASE_ATOM_GPU_RB_READY: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: - break; - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: - if (kbase_jd_katom_is_protected(katom) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK) && - (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_HWCNT)) - kbase_pm_protected_override_disable(kbdev); - if (!kbase_jd_katom_is_protected(katom) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) && - (katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) - kbase_pm_protected_override_disable(kbdev); - - if (katom->protected_state.enter != - KBASE_ATOM_ENTER_PROTECTED_CHECK || - katom->protected_state.exit != - KBASE_ATOM_EXIT_PROTECTED_CHECK) - kbdev->protected_mode_transition = false; - /* If the atom has suspended hwcnt but has not yet entered - * protected mode, then resume hwcnt now. If the GPU is now in - * protected mode then hwcnt will be resumed by GPU reset so - * don't resume it here. - */ - if (kbase_jd_katom_is_protected(katom) && - ((katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || - (katom->protected_state.enter == - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { - if (katom->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbase_pm_protected_l2_override(kbdev, false); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; - } - } - - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - - case KBASE_ATOM_GPU_RB_RETURN_TO_JS: - break; - } - - katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; - katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; -} - -static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbase_gpu_release_atom(kbdev, katom, NULL); - katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; -} - -static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - bool slot_busy[3]; - - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) - return true; - slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0, - KBASE_ATOM_GPU_RB_WAITING_AFFINITY); - slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1, - KBASE_ATOM_GPU_RB_WAITING_AFFINITY); - slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2, - KBASE_ATOM_GPU_RB_WAITING_AFFINITY); - - if ((js == 2 && !(slot_busy[0] || slot_busy[1])) || - (js != 2 && !slot_busy[2])) - return true; - - /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */ - if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) || - kbase_gpu_atoms_submitted(kbdev, 1) || - backend->rmu_workaround_flag)) - return false; - - /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */ - if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) || - !backend->rmu_workaround_flag)) - return false; - - backend->rmu_workaround_flag = !backend->rmu_workaround_flag; - - return true; -} - -/** - * other_slots_busy - Determine if any job slots other than @js are currently - * running atoms - * @kbdev: Device pointer - * @js: Job slot - * - * Return: true if any slots other than @js are busy, false otherwise - */ -static inline bool other_slots_busy(struct kbase_device *kbdev, int js) -{ - int slot; - - for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { - if (slot == js) - continue; - - if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, - KBASE_ATOM_GPU_RB_SUBMITTED)) - return true; - } - - return false; -} - -static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) -{ - return kbdev->protected_mode; -} - -static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* - * When entering into protected mode, we must ensure that the - * GPU is not operating in coherent mode as well. This is to - * ensure that no protected memory can be leaked. - */ - if (kbdev->system_coherency == COHERENCY_ACE) - kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); -} - -static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) -{ - int err = -EINVAL; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ONCE(!kbdev->protected_ops, - "Cannot enter protected mode: protected callbacks not specified.\n"); - - if (kbdev->protected_ops) { - /* Switch GPU to protected mode */ - err = kbdev->protected_ops->protected_mode_enable( - kbdev->protected_dev); - - if (err) { - dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", - err); - } else { - kbdev->protected_mode = true; - kbase_ipa_protection_mode_switch_event(kbdev); - } - } - - return err; -} - -static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ONCE(!kbdev->protected_ops, - "Cannot exit protected mode: protected callbacks not specified.\n"); - - if (!kbdev->protected_ops) - return -EINVAL; - - /* The protected mode disable callback will be called as part of reset - */ - return kbase_reset_gpu_silent(kbdev); -} - -static int kbase_jm_protected_entry(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) -{ - int err = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - err = kbase_gpu_protected_mode_enter(kbdev); - - /* - * Regardless of result before this call, we are no longer - * transitioning the GPU. - */ - - kbdev->protected_mode_transition = false; - kbase_pm_protected_override_disable(kbdev); - kbase_pm_update_cores_state_nolock(kbdev); - - KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev); - if (err) { - /* - * Failed to switch into protected mode, resume - * GPU hwcnt and fail atom. - */ - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* - * Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order. - */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - - return -EINVAL; - } - - /* - * Protected mode sanity checks. - */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == - kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), - kbase_gpu_in_protected_mode(kbdev)); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; - - return err; -} - -static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) -{ - int err = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - switch (katom[idx]->protected_state.enter) { - case KBASE_ATOM_ENTER_PROTECTED_CHECK: - KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev); - /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV - * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ - WARN_ON(kbdev->protected_mode_transition); - WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); - /* If hwcnt is disabled, it means we didn't clean up correctly - * during last exit from protected mode. - */ - WARN_ON(kbdev->protected_mode_hwcnt_disabled); - - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_HWCNT; - - kbdev->protected_mode_transition = true; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_HWCNT: - /* See if we can get away with disabling hwcnt atomically */ - kbdev->protected_mode_hwcnt_desired = false; - if (!kbdev->protected_mode_hwcnt_disabled) { - if (kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)) - kbdev->protected_mode_hwcnt_disabled = true; - } - - /* We couldn't disable atomically, so kick off a worker */ - if (!kbdev->protected_mode_hwcnt_disabled) { -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, - &kbdev->protected_mode_hwcnt_disable_work); -#else - queue_work(system_highpri_wq, - &kbdev->protected_mode_hwcnt_disable_work); -#endif - return -EAGAIN; - } - - /* Once reaching this point GPU must be - * switched to protected mode or hwcnt - * re-enabled. */ - - /* - * Not in correct mode, begin protected mode switch. - * Entering protected mode requires us to power down the L2, - * and drop out of fully coherent mode. - */ - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; - - kbase_pm_protected_override_enable(kbdev); - kbase_pm_update_cores_state_nolock(kbdev); - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: - /* Avoid unnecessary waiting on non-ACE platforms. */ - if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) { - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { - /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. - */ - return -EAGAIN; - } - } - - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: - /* - * When entering into protected mode, we must ensure that the - * GPU is not operating in coherent mode as well. This is to - * ensure that no protected memory can be leaked. - */ - kbase_gpu_disable_coherent(kbdev); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { - /* - * Power on L2 caches; this will also result in the - * correct value written to coherency enable register. - */ - kbase_pm_protected_l2_override(kbdev, true); - - /* - * Set the flag on the atom that additional - * L2 references are taken. - */ - katom[idx]->atom_flags |= - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; - } - - katom[idx]->protected_state.enter = - KBASE_ATOM_ENTER_PROTECTED_FINISHED; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) - return -EAGAIN; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_ENTER_PROTECTED_FINISHED: - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { - /* - * Check that L2 caches are powered and, if so, - * enter protected mode. - */ - if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { - /* - * Remove additional L2 reference and reset - * the atom flag which denotes it. - */ - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { - kbase_pm_protected_l2_override(kbdev, - false); - katom[idx]->atom_flags &= - ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; - } - - err = kbase_jm_protected_entry(kbdev, katom, idx, js); - - if (err) - return err; - } else { - /* - * still waiting for L2 caches to power up - */ - return -EAGAIN; - } - } else { - err = kbase_jm_protected_entry(kbdev, katom, idx, js); - - if (err) - return err; - } - } - - return 0; -} - -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) -{ - int err = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - switch (katom[idx]->protected_state.exit) { - case KBASE_ATOM_EXIT_PROTECTED_CHECK: - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev); - /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV - * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ - WARN_ON(kbdev->protected_mode_transition); - WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); - - /* - * Exiting protected mode requires a reset, but first the L2 - * needs to be powered down to ensure it's not active when the - * reset is issued. - */ - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; - - kbdev->protected_mode_transition = true; - kbase_pm_protected_override_enable(kbdev); - kbase_pm_update_cores_state_nolock(kbdev); - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: - if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { - /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. - */ - return -EAGAIN; - } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_EXIT_PROTECTED_RESET: - /* Issue the reset to the GPU */ - err = kbase_gpu_protected_mode_reset(kbdev); - - if (err == -EAGAIN) - return -EAGAIN; - - if (err) { - kbdev->protected_mode_transition = false; - kbase_pm_protected_override_disable(kbdev); - - /* Failed to exit protected mode, fail atom */ - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - - /* If we're exiting from protected mode, hwcnt must have - * been disabled during entry. - */ - WARN_ON(!kbdev->protected_mode_hwcnt_disabled); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - - return -EINVAL; - } - - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: - /* A GPU reset is issued when exiting protected mode. Once the - * reset is done all atoms' state will also be reset. For this - * reason, if the atom is still in this state we can safely - * say that the reset has not completed i.e., we have not - * finished exiting protected mode yet. - */ - return -EAGAIN; - } - - return 0; -} - -void kbase_backend_slot_update(struct kbase_device *kbdev) -{ - int js; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbase_reset_gpu_active(kbdev)) - return; - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - struct kbase_jd_atom *katom[2]; - int idx; - - katom[0] = kbase_gpu_inspect(kbdev, js, 0); - katom[1] = kbase_gpu_inspect(kbdev, js, 1); - WARN_ON(katom[1] && !katom[0]); - - for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - bool cores_ready; - int ret; - - if (!katom[idx]) - continue; - - switch (katom[idx]->gpu_rb_state) { - case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: - /* Should be impossible */ - WARN(1, "Attempting to update atom not in ringbuffer\n"); - break; - - case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: - if (katom[idx]->atom_flags & - KBASE_KATOM_FLAG_X_DEP_BLOCKED) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: - if (kbase_gpu_check_secure_atoms(kbdev, - !kbase_jd_katom_is_protected( - katom[idx]))) - break; - - if ((idx == 1) && (kbase_jd_katom_is_protected( - katom[0]) != - kbase_jd_katom_is_protected( - katom[1]))) - break; - - if (kbdev->protected_mode_transition) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: - - /* - * Exiting protected mode must be done before - * the references on the cores are taken as - * a power down the L2 is required which - * can't happen after the references for this - * atom are taken. - */ - - if (!kbase_gpu_in_protected_mode(kbdev) && - kbase_jd_katom_is_protected(katom[idx])) { - /* Atom needs to transition into protected mode. */ - ret = kbase_jm_enter_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; - } else if (kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx])) { - /* Atom needs to transition out of protected mode. */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; - } - katom[idx]->protected_state.exit = - KBASE_ATOM_EXIT_PROTECTED_CHECK; - - /* Atom needs no protected mode transition. */ - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: - if (katom[idx]->will_fail_event_code) { - kbase_gpu_mark_atom_for_return(kbdev, - katom[idx]); - /* Set EVENT_DONE so this atom will be - completed, not unpulled. */ - katom[idx]->event_code = - BASE_JD_EVENT_DONE; - /* Only return if head atom or previous - * atom already removed - as atoms must - * be returned in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - break; - } - - cores_ready = kbase_pm_cores_requested(kbdev, - true); - - if (katom[idx]->event_code == - BASE_JD_EVENT_PM_EVENT) { - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_RETURN_TO_JS; - break; - } - - if (!cores_ready) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_AFFINITY; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: - if (!kbase_gpu_rmu_workaround(kbdev, js)) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_READY; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_READY: - - if (idx == 1) { - /* Only submit if head atom or previous - * atom already submitted */ - if ((katom[0]->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - break; - - /* If intra-slot serialization in use - * then don't submit atom to NEXT slot - */ - if (kbdev->serialize_jobs & - KBASE_SERIALIZE_INTRA_SLOT) - break; - } - - /* If inter-slot serialization in use then don't - * submit atom if any other slots are in use */ - if ((kbdev->serialize_jobs & - KBASE_SERIALIZE_INTER_SLOT) && - other_slots_busy(kbdev, js)) - break; - - if ((kbdev->serialize_jobs & - KBASE_SERIALIZE_RESET) && - kbase_reset_gpu_active(kbdev)) - break; - - /* Check if this job needs the cycle counter - * enabled before submission */ - if (katom[idx]->core_req & BASE_JD_REQ_PERMON) - kbase_pm_request_gpu_cycle_counter_l2_is_on( - kbdev); - - kbase_job_hw_submit(kbdev, katom[idx], js); - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_SUBMITTED; - - /* Inform power management at start/finish of - * atom so it can update its GPU utilisation - * metrics. */ - kbase_pm_metrics_update(kbdev, - &katom[idx]->start_timestamp); - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_SUBMITTED: - /* Atom submitted to HW, nothing else to do */ - break; - - case KBASE_ATOM_GPU_RB_RETURN_TO_JS: - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, - katom[idx]); - } - break; - } - } - } - - /* Warn if PRLAM-8987 affinity restrictions are violated */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) - WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) || - kbase_gpu_atoms_submitted(kbdev, 1)) && - kbase_gpu_atoms_submitted(kbdev, 2)); -} - - -void kbase_backend_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - kbase_gpu_enqueue_atom(kbdev, katom); - kbase_backend_slot_update(kbdev); -} - -#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ - (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) - -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code) -{ - struct kbase_jd_atom *katom; - struct kbase_jd_atom *next_katom; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom = kbase_gpu_inspect(kbdev, js, 0); - next_katom = kbase_gpu_inspect(kbdev, js, 1); - - if (next_katom && katom->kctx == next_katom->kctx && - next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && - (HAS_DEP(next_katom) || next_katom->sched_priority == - katom->sched_priority) && - (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) - != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) - != 0)) { - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), - JS_COMMAND_NOP); - next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - - if (completion_code == BASE_JD_EVENT_STOPPED) { - KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); - KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as - [next_katom->kctx->as_nr]); - KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx, - &kbdev->gpu_props.props.raw_props.js_features - [next_katom->slot_nr]); - } - - if (next_katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); - - return true; - } - - return false; -} - -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp) -{ - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - struct kbase_context *kctx = katom->kctx; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* - * When a hard-stop is followed close after a soft-stop, the completion - * code may be set to STOPPED, even though the job is terminated - */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { - if (completion_code == BASE_JD_EVENT_STOPPED && - (katom->atom_flags & - KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { - completion_code = BASE_JD_EVENT_TERMINATED; - } - } - - if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) || (katom->core_req & - BASE_JD_REQ_SKIP_CACHE_END)) && - completion_code != BASE_JD_EVENT_DONE && - !(completion_code & BASE_JD_SW_EVENT)) { - /* When a job chain fails, on a T60x or when - * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not - * flushed. To prevent future evictions causing possible memory - * corruption we need to flush the cache manually before any - * affected memory gets reused. */ - katom->need_cache_flush_cores_retained = true; - } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { - if (kbdev->gpu_props.num_core_groups > 1 && - katom->device_nr >= 1) { - dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); - katom->need_cache_flush_cores_retained = true; - } - } - - katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); - - if (completion_code == BASE_JD_EVENT_STOPPED) { - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); - - /* - * Dequeue next atom from ringbuffers on same slot if required. - * This atom will already have been removed from the NEXT - * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that - * the atoms on this slot are returned in the correct order. - */ - if (next_katom && katom->kctx == next_katom->kctx && - next_katom->sched_priority == - katom->sched_priority) { - WARN_ON(next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED); - kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); - kbase_jm_return_atom_to_js(kbdev, next_katom); - } - } else if (completion_code != BASE_JD_EVENT_DONE) { - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int i; - - if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) - dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", - js, completion_code, - kbase_exception_name - (kbdev, - completion_code)); - -#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 - KBASE_TRACE_DUMP(kbdev); -#endif - kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); - - /* - * Remove all atoms on the same context from ringbuffers. This - * will not remove atoms that are already on the GPU, as these - * are guaranteed not to have fail dependencies on the failed - * atom. - */ - for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { - struct kbase_jd_atom *katom_idx0 = - kbase_gpu_inspect(kbdev, i, 0); - struct kbase_jd_atom *katom_idx1 = - kbase_gpu_inspect(kbdev, i, 1); - - if (katom_idx0 && katom_idx0->kctx == katom->kctx && - HAS_DEP(katom_idx0) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { - /* Dequeue katom_idx0 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); - - if (katom_idx1 && - katom_idx1->kctx == katom->kctx - && HAS_DEP(katom_idx1) && - katom_idx0->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { - /* Dequeue katom_idx1 from ringbuffer */ - kbase_gpu_dequeue_atom(kbdev, i, - end_timestamp); - - katom_idx1->event_code = - BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, - katom_idx1); - } - katom_idx0->event_code = BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, katom_idx0); - - } else if (katom_idx1 && - katom_idx1->kctx == katom->kctx && - HAS_DEP(katom_idx1) && - katom_idx1->gpu_rb_state != - KBASE_ATOM_GPU_RB_SUBMITTED) { - /* Can not dequeue this atom yet - will be - * dequeued when atom at idx0 completes */ - katom_idx1->event_code = BASE_JD_EVENT_STOPPED; - kbase_gpu_mark_atom_for_return(kbdev, - katom_idx1); - } - } - } - - KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, - js, completion_code); - - if (job_tail != 0 && job_tail != katom->jc) { - bool was_updated = (job_tail != katom->jc); - - /* Some of the job has been executed, so we update the job chain - * address to where we should resume from */ - katom->jc = job_tail; - if (was_updated) - KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, - katom, job_tail, js); - } - - /* Only update the event code for jobs that weren't cancelled */ - if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) - katom->event_code = (base_jd_event_code)completion_code; - - /* Complete the job, and start new ones - * - * Also defer remaining work onto the workqueue: - * - Re-queue Soft-stopped jobs - * - For any other jobs, queue the job back into the dependency system - * - Schedule out the parent context if necessary, and schedule a new - * one in. - */ -#ifdef CONFIG_GPU_TRACEPOINTS - { - /* The atom in the HEAD */ - struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, - 0); - - if (next_katom && next_katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { - char js_string[16]; - - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(*end_timestamp), - (u32)next_katom->kctx->id, 0, - next_katom->work_id); - kbdev->hwaccess.backend.slot_rb[js].last_context = - next_katom->kctx; - } else { - char js_string[16]; - - trace_gpu_sched_switch(kbasep_make_job_slot_string(js, - js_string, - sizeof(js_string)), - ktime_to_ns(ktime_get()), 0, 0, - 0); - kbdev->hwaccess.backend.slot_rb[js].last_context = 0; - } - } -#endif - - if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) - kbase_reset_gpu_silent(kbdev); - - if (completion_code == BASE_JD_EVENT_STOPPED) - katom = kbase_jm_return_atom_to_js(kbdev, katom); - else - katom = kbase_jm_complete(kbdev, katom, end_timestamp); - - if (katom) { - /* Cross-slot dependency has now become runnable. Try to submit - * it. */ - - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); - - kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); - } - - /* Job completion may have unblocked other atoms. Try to update all job - * slots */ - kbase_backend_slot_update(kbdev); -} - -void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) -{ - int js; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* Reset should always take the GPU out of protected mode */ - WARN_ON(kbase_gpu_in_protected_mode(kbdev)); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - int atom_idx = 0; - int idx; - - for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, atom_idx); - bool keep_in_jm_rb = false; - - if (!katom) - break; - if (katom->protected_state.exit == - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { - /* protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_protected(katom) && js == 0) || - !kbase_jd_katom_is_protected(katom), - "Protected atom on JS%d not supported", js); - } - if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && - !kbase_ctx_flag(katom->kctx, KCTX_DYING)) - keep_in_jm_rb = true; - - kbase_gpu_release_atom(kbdev, katom, NULL); - - /* - * If the atom wasn't on HW when the reset was issued - * then leave it in the RB and next time we're kicked - * it will be processed again from the starting state. - */ - if (keep_in_jm_rb) { - katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; - /* As the atom was not removed, increment the - * index so that we read the correct atom in the - * next iteration. */ - atom_idx++; - continue; - } - - /* - * The atom was on the HW when the reset was issued - * all we can do is fail the atom. - */ - kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, end_timestamp); - } - } - - /* Re-enable GPU hardware counters if we're resetting from protected - * mode. - */ - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - - KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev); - } - - kbdev->protected_mode_transition = false; - kbase_pm_protected_override_disable(kbdev); -} - -static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, - int js, - struct kbase_jd_atom *katom, - u32 action) -{ - u32 hw_action = action & JS_COMMAND_MASK; - - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); - kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, - katom->core_req, katom); - katom->kctx->blocked_js[js][katom->sched_priority] = true; -} - -static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - u32 action, - bool disjoint) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_gpu_mark_atom_for_return(kbdev, katom); - katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; - - if (disjoint) - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, - katom); -} - -static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) -{ - if (katom->x_post_dep) { - struct kbase_jd_atom *dep_atom = katom->x_post_dep; - - if (dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && - dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_RETURN_TO_JS) - return dep_atom->slot_nr; - } - return -1; -} - -bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js, - struct kbase_jd_atom *katom, - u32 action) -{ - struct kbase_jd_atom *katom_idx0; - struct kbase_jd_atom *katom_idx1; - - bool katom_idx0_valid, katom_idx1_valid; - - bool ret = false; - - int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; - int prio_idx0 = 0, prio_idx1 = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); - katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); - - if (katom_idx0) - prio_idx0 = katom_idx0->sched_priority; - if (katom_idx1) - prio_idx1 = katom_idx1->sched_priority; - - if (katom) { - katom_idx0_valid = (katom_idx0 == katom); - /* If idx0 is to be removed and idx1 is on the same context, - * then idx1 must also be removed otherwise the atoms might be - * returned out of order */ - if (katom_idx1) - katom_idx1_valid = (katom_idx1 == katom) || - (katom_idx0_valid && - (katom_idx0->kctx == - katom_idx1->kctx)); - else - katom_idx1_valid = false; - } else { - katom_idx0_valid = (katom_idx0 && - (!kctx || katom_idx0->kctx == kctx)); - katom_idx1_valid = (katom_idx1 && - (!kctx || katom_idx1->kctx == kctx) && - prio_idx0 == prio_idx1); - } - - if (katom_idx0_valid) - stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); - if (katom_idx1_valid) - stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); - - if (katom_idx0_valid) { - if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { - /* Simple case - just dequeue and return */ - kbase_gpu_dequeue_atom(kbdev, js, NULL); - if (katom_idx1_valid) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom_idx1->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_jm_return_atom_to_js(kbdev, katom_idx1); - katom_idx1->kctx->blocked_js[js][prio_idx1] = - true; - } - - katom_idx0->event_code = - BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_jm_return_atom_to_js(kbdev, katom_idx0); - katom_idx0->kctx->blocked_js[js][prio_idx0] = true; - } else { - /* katom_idx0 is on GPU */ - if (katom_idx1_valid && katom_idx1->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { - /* katom_idx0 and katom_idx1 are on GPU */ - - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { - /* idx0 has already completed - stop - * idx1 if needed*/ - if (katom_idx1_valid) { - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); - ret = true; - } - } else { - /* idx1 is in NEXT registers - attempt - * to remove */ - kbase_reg_write(kbdev, - JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); - - if (kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) - != 0 || - kbase_reg_read(kbdev, - JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) - != 0) { - /* idx1 removed successfully, - * will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, - katom_idx1, - action, true); - stop_x_dep_idx1 = - should_stop_x_dep_slot(katom_idx1); - - /* stop idx0 if still on GPU */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx0, - action); - ret = true; - } else if (katom_idx1_valid) { - /* idx0 has already completed, - * stop idx1 if needed */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); - ret = true; - } - } - } else if (katom_idx1_valid) { - /* idx1 not on GPU but must be dequeued*/ - - /* idx1 will be handled in IRQ */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); - /* stop idx0 */ - /* This will be repeated for anything removed - * from the next registers, since their normal - * flow was also interrupted, and this function - * might not enter disjoint state e.g. if we - * don't actually do a hard stop on the head - * atom */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); - ret = true; - } else { - /* no atom in idx1 */ - /* just stop idx0 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx0, - action); - ret = true; - } - } - } else if (katom_idx1_valid) { - if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { - /* Mark for return */ - /* idx1 will be returned once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, action, - false); - } else { - /* idx1 is on GPU */ - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT)) == 0) { - /* idx0 has already completed - stop idx1 */ - kbase_gpu_stop_atom(kbdev, js, katom_idx1, - action); - ret = true; - } else { - /* idx1 is in NEXT registers - attempt to - * remove */ - kbase_reg_write(kbdev, JOB_SLOT_REG(js, - JS_COMMAND_NEXT), - JS_COMMAND_NOP); - - if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_LO)) != 0 || - kbase_reg_read(kbdev, JOB_SLOT_REG(js, - JS_HEAD_NEXT_HI)) != 0) { - /* idx1 removed successfully, will be - * handled in IRQ once idx0 completes */ - kbase_gpu_remove_atom(kbdev, katom_idx1, - action, - false); - } else { - /* idx0 has already completed - stop - * idx1 */ - kbase_gpu_stop_atom(kbdev, js, - katom_idx1, - action); - ret = true; - } - } - } - } - - - if (stop_x_dep_idx0 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, - NULL, action); - - if (stop_x_dep_idx1 != -1) - kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, - NULL, action); - - return ret; -} - -void kbase_backend_cache_clean(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - if (katom->need_cache_flush_cores_retained) { - kbase_gpu_start_cache_clean(kbdev); - kbase_gpu_wait_cache_clean(kbdev); - - katom->need_cache_flush_cores_retained = false; - } -} - -void kbase_backend_complete_wq(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - /* - * If cache flush required due to HW workaround then perform the flush - * now - */ - kbase_backend_cache_clean(kbdev, katom); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && - (katom->core_req & BASE_JD_REQ_FS) && - katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && - (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && - !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) { - dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n"); - if (kbasep_10969_workaround_clamp_coordinates(katom)) { - /* The job had a TILE_RANGE_FAULT after was soft-stopped - * Due to an HW issue we try to execute the job again. - */ - dev_dbg(kbdev->dev, - "Clamping has been executed, try to rerun the job\n" - ); - katom->event_code = BASE_JD_EVENT_STOPPED; - katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; - } - } -} - -void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req) -{ - if (!kbdev->pm.active_count) { - mutex_lock(&kbdev->js_data.runpool_mutex); - mutex_lock(&kbdev->pm.lock); - kbase_pm_update_active(kbdev); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&kbdev->js_data.runpool_mutex); - } -} - -void kbase_gpu_dump_slots(struct kbase_device *kbdev) -{ - unsigned long flags; - int js; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - int idx; - - for (idx = 0; idx < SLOT_RB_SIZE; idx++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, - js, - idx); - - if (katom) - dev_info(kbdev->dev, - " js%d idx%d : katom=%p gpu_rb_state=%d\n", - js, idx, katom, katom->gpu_rb_state); - else - dev_info(kbdev->dev, " js%d idx%d : empty\n", - js, idx); - } - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.h deleted file mode 100755 index c3b9f2d85536..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_jm_rb.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend specific APIs - */ - -#ifndef _KBASE_HWACCESS_GPU_H_ -#define _KBASE_HWACCESS_GPU_H_ - -#include - -/** - * kbase_gpu_irq_evict - Evict an atom from a NEXT slot - * - * @kbdev: Device pointer - * @js: Job slot to evict from - * @completion_code: Event code from job that was run. - * - * Evict the atom in the NEXT slot for the specified job slot. This function is - * called from the job complete IRQ handler when the previous job has failed. - * - * Return: true if job evicted from NEXT registers, false otherwise - */ -bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, - u32 completion_code); - -/** - * kbase_gpu_complete_hw - Complete an atom on job slot js - * - * @kbdev: Device pointer - * @js: Job slot that has completed - * @completion_code: Event code from job that has completed - * @job_tail: The tail address from the hardware if the job has partially - * completed - * @end_timestamp: Time of completion - */ -void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, - u32 completion_code, - u64 job_tail, - ktime_t *end_timestamp); - -/** - * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer - * - * @kbdev: Device pointer - * @js: Job slot to inspect - * @idx: Index into ringbuffer. 0 is the job currently running on - * the slot, 1 is the job waiting, all other values are invalid. - * Return: The atom at that position in the ringbuffer - * or NULL if no atom present - */ -struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, - int idx); - -/** - * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers - * - * @kbdev: Device pointer - */ -void kbase_gpu_dump_slots(struct kbase_device *kbdev); - -#endif /* _KBASE_HWACCESS_GPU_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_backend.c deleted file mode 100755 index 7307be403d44..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_backend.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend specific job scheduler APIs - */ - -#include -#include -#include -#include - -/* - * Hold the runpool_mutex for this - */ -static inline bool timer_callback_should_run(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - s8 nr_running_ctxs; - - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - - /* Timer must stop if we are suspending */ - if (backend->suspend_timer) - return false; - - /* nr_contexts_pullable is updated with the runpool_mutex. However, the - * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading */ - nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); - -#ifdef CONFIG_MALI_DEBUG - if (kbdev->js_data.softstop_always) { - /* Debug support for allowing soft-stop on a single context */ - return true; - } -#endif /* CONFIG_MALI_DEBUG */ - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { - /* Timeouts would have to be 4x longer (due to micro- - * architectural design) to support OpenCL conformance tests, so - * only run the timer when there's: - * - 2 or more CL contexts - * - 1 or more GLES contexts - * - * NOTE: We will treat a context that has both Compute and Non- - * Compute jobs will be treated as an OpenCL context (hence, we - * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). - */ - { - s8 nr_compute_ctxs = - kbasep_js_ctx_attr_count_on_runpool(kbdev, - KBASEP_JS_CTX_ATTR_COMPUTE); - s8 nr_noncompute_ctxs = nr_running_ctxs - - nr_compute_ctxs; - - return (bool) (nr_compute_ctxs >= 2 || - nr_noncompute_ctxs > 0); - } - } else { - /* Run the timer callback whenever you have at least 1 context - */ - return (bool) (nr_running_ctxs > 0); - } -} - -static enum hrtimer_restart timer_callback(struct hrtimer *timer) -{ - unsigned long flags; - struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; - struct kbase_backend_data *backend; - int s; - bool reset_needed = false; - - KBASE_DEBUG_ASSERT(timer != NULL); - - backend = container_of(timer, struct kbase_backend_data, - scheduling_timer); - kbdev = container_of(backend, struct kbase_device, hwaccess.backend); - js_devdata = &kbdev->js_data; - - /* Loop through the slots */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { - struct kbase_jd_atom *atom = NULL; - - if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { - atom = kbase_gpu_inspect(kbdev, s, 0); - KBASE_DEBUG_ASSERT(atom != NULL); - } - - if (atom != NULL) { - /* The current version of the model doesn't support - * Soft-Stop */ - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { - u32 ticks = atom->ticks++; - -#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) - u32 soft_stop_ticks, hard_stop_ticks, - gpu_reset_ticks; - if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - soft_stop_ticks = - js_devdata->soft_stop_ticks_cl; - hard_stop_ticks = - js_devdata->hard_stop_ticks_cl; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_cl; - } else { - soft_stop_ticks = - js_devdata->soft_stop_ticks; - hard_stop_ticks = - js_devdata->hard_stop_ticks_ss; - gpu_reset_ticks = - js_devdata->gpu_reset_ticks_ss; - } - - /* If timeouts have been changed then ensure - * that atom tick count is not greater than the - * new soft_stop timeout. This ensures that - * atoms do not miss any of the timeouts due to - * races between this worker and the thread - * changing the timeouts. */ - if (backend->timeouts_updated && - ticks > soft_stop_ticks) - ticks = atom->ticks = soft_stop_ticks; - - /* Job is Soft-Stoppable */ - if (ticks == soft_stop_ticks) { - /* Job has been scheduled for at least - * js_devdata->soft_stop_ticks ticks. - * Soft stop the slot so we can run - * other jobs. - */ -#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS - int disjoint_threshold = - KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; - u32 softstop_flags = 0u; - - dev_dbg(kbdev->dev, "Soft-stop"); - /* nr_user_contexts_running is updated - * with the runpool_mutex, but we can't - * take that here. - * - * However, if it's about to be - * increased then the new context can't - * run any jobs until they take the - * hwaccess_lock, so it's OK to observe - * the older value. - * - * Similarly, if it's about to be - * decreased, the last job from another - * context has already finished, so it's - * not too bad that we observe the older - * value and register a disjoint event - * when we try soft-stopping */ - if (js_devdata->nr_user_contexts_running - >= disjoint_threshold) - softstop_flags |= - JS_COMMAND_SW_CAUSES_DISJOINT; - - kbase_job_slot_softstop_swflags(kbdev, - s, atom, softstop_flags); -#endif - } else if (ticks == hard_stop_ticks) { - /* Job has been scheduled for at least - * js_devdata->hard_stop_ticks_ss ticks. - * It should have been soft-stopped by - * now. Hard stop the slot. - */ -#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); -#endif - } else if (ticks == gpu_reset_ticks) { - /* Job has been scheduled for at least - * js_devdata->gpu_reset_ticks_ss ticks. - * It should have left the GPU by now. - * Signal that the GPU needs to be - * reset. - */ - reset_needed = true; - } -#else /* !CONFIG_MALI_JOB_DUMP */ - /* NOTE: During CONFIG_MALI_JOB_DUMP, we use - * the alternate timeouts, which makes the hard- - * stop and GPU reset timeout much longer. We - * also ensure that we don't soft-stop at all. - */ - if (ticks == js_devdata->soft_stop_ticks) { - /* Job has been scheduled for at least - * js_devdata->soft_stop_ticks. We do - * not soft-stop during - * CONFIG_MALI_JOB_DUMP, however. - */ - dev_dbg(kbdev->dev, "Soft-stop"); - } else if (ticks == - js_devdata->hard_stop_ticks_dumping) { - /* Job has been scheduled for at least - * js_devdata->hard_stop_ticks_dumping - * ticks. Hard stop the slot. - */ -#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS - int ms = - js_devdata->scheduling_period_ns - / 1000000u; - dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", - (unsigned long)ticks, - (unsigned long)ms); - kbase_job_slot_hardstop(atom->kctx, s, - atom); -#endif - } else if (ticks == - js_devdata->gpu_reset_ticks_dumping) { - /* Job has been scheduled for at least - * js_devdata->gpu_reset_ticks_dumping - * ticks. It should have left the GPU by - * now. Signal that the GPU needs to be - * reset. - */ - reset_needed = true; - } -#endif /* !CONFIG_MALI_JOB_DUMP */ - } - } - } - if (reset_needed) { - dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); - - if (kbase_prepare_to_reset_gpu_locked(kbdev)) - kbase_reset_gpu_locked(kbdev); - } - /* the timer is re-issued if there is contexts in the run-pool */ - - if (backend->timer_running) - hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); - - backend->timeouts_updated = false; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return HRTIMER_NORESTART; -} - -void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - unsigned long flags; - - lockdep_assert_held(&js_devdata->runpool_mutex); - - if (!timer_callback_should_run(kbdev)) { - /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - backend->timer_running = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /* From now on, return value of timer_callback_should_run() will - * also cause the timer to not requeue itself. Its return value - * cannot change, because it depends on variables updated with - * the runpool_mutex held, which the caller of this must also - * hold */ - hrtimer_cancel(&backend->scheduling_timer); - } - - if (timer_callback_should_run(kbdev) && !backend->timer_running) { - /* Take spinlock to force synchronisation with timer */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - backend->timer_running = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - hrtimer_start(&backend->scheduling_timer, - HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), - HRTIMER_MODE_REL); - - KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, - 0u); - } -} - -int kbase_backend_timer_init(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - backend->scheduling_timer.function = timer_callback; - - backend->timer_running = false; - - return 0; -} - -void kbase_backend_timer_term(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - hrtimer_cancel(&backend->scheduling_timer); -} - -void kbase_backend_timer_suspend(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - backend->suspend_timer = true; - - kbase_backend_ctx_count_changed(kbdev); -} - -void kbase_backend_timer_resume(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - backend->suspend_timer = false; - - kbase_backend_ctx_count_changed(kbdev); -} - -void kbase_backend_timeouts_changed(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - backend->timeouts_updated = true; -} - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_internal.h deleted file mode 100755 index 6576e55d2e39..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_js_internal.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Register-based HW access backend specific job scheduler APIs - */ - -#ifndef _KBASE_JS_BACKEND_H_ -#define _KBASE_JS_BACKEND_H_ - -/** - * kbase_backend_timer_init() - Initialise the JS scheduling timer - * @kbdev: Device pointer - * - * This function should be called at driver initialisation - * - * Return: 0 on success - */ -int kbase_backend_timer_init(struct kbase_device *kbdev); - -/** - * kbase_backend_timer_term() - Terminate the JS scheduling timer - * @kbdev: Device pointer - * - * This function should be called at driver termination - */ -void kbase_backend_timer_term(struct kbase_device *kbdev); - -/** - * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling - * timer - * @kbdev: Device pointer - * - * This function should be called on suspend, after the active count has reached - * zero. This is required as the timer may have been started on job submission - * to the job scheduler, but before jobs are submitted to the GPU. - * - * Caller must hold runpool_mutex. - */ -void kbase_backend_timer_suspend(struct kbase_device *kbdev); - -/** - * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS - * scheduling timer - * @kbdev: Device pointer - * - * This function should be called on resume. Note that is is not guaranteed to - * re-start the timer, only evalute whether it should be re-started. - * - * Caller must hold runpool_mutex. - */ -void kbase_backend_timer_resume(struct kbase_device *kbdev); - -#endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c deleted file mode 100755 index ba5bf721e523..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ /dev/null @@ -1,400 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include - -#include -#include -#include -#include -#include -#include - -static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, - u32 num_pages) -{ - u64 region; - - /* can't lock a zero sized range */ - KBASE_DEBUG_ASSERT(num_pages); - - region = pfn << PAGE_SHIFT; - /* - * fls returns (given the ASSERT above): - * 1 .. 32 - * - * 10 + fls(num_pages) - * results in the range (11 .. 42) - */ - - /* gracefully handle num_pages being zero */ - if (0 == num_pages) { - region |= 11; - } else { - u8 region_width; - - region_width = 10 + fls(num_pages); - if (num_pages != (1ul << (region_width - 11))) { - /* not pow2, so must go up to the next pow2 */ - region_width += 1; - } - KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); - KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); - region |= region_width; - } - - return region; -} - -static int wait_ready(struct kbase_device *kbdev, - unsigned int as_nr) -{ - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; - u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); - - /* Wait for the MMU status to indicate there is no active command, in - * case one is pending. Do not log remaining register accesses. */ - while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) - val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); - - if (max_loops == 0) { - dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); - return -1; - } - - /* If waiting in loop was performed, log last read value. */ - if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) - kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); - - return 0; -} - -static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) -{ - int status; - - /* write AS_COMMAND when MMU is ready to accept another command */ - status = wait_ready(kbdev, as_nr); - if (status == 0) - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); - - return status; -} - -static void validate_protected_page_fault(struct kbase_device *kbdev) -{ - /* GPUs which support (native) protected mode shall not report page - * fault addresses unless it has protected debug mode and protected - * debug mode is turned on */ - u32 protected_debug_mode = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) - return; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - protected_debug_mode = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; - } - - if (!protected_debug_mode) { - /* fault_addr should never be reported in protected mode. - * However, we just continue by printing an error message */ - dev_err(kbdev->dev, "Fault address reported in protected mode\n"); - } -} - -void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) -{ - const int num_as = 16; - const int busfault_shift = MMU_PAGE_FAULT_FLAGS; - const int pf_shift = 0; - const unsigned long as_bit_mask = (1UL << num_as) - 1; - unsigned long flags; - u32 new_mask; - u32 tmp; - - /* bus faults */ - u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; - /* page faults (note: Ignore ASes with both pf and bf) */ - u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - - /* remember current mask */ - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); - /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); - - while (bf_bits | pf_bits) { - struct kbase_as *as; - int as_no; - struct kbase_context *kctx; - struct kbase_fault *fault; - - /* - * the while logic ensures we have a bit set, no need to check - * for not-found here - */ - as_no = ffs(bf_bits | pf_bits) - 1; - as = &kbdev->as[as_no]; - - /* find the fault type */ - as->fault_type = (bf_bits & (1 << as_no)) ? - KBASE_MMU_FAULT_TYPE_BUS : - KBASE_MMU_FAULT_TYPE_PAGE; - - if (kbase_as_has_bus_fault(as)) - fault = &as->bf_data; - else - fault = &as->pf_data; - - /* - * Refcount the kctx ASAP - it shouldn't disappear anyway, since - * Bus/Page faults _should_ only occur whilst jobs are running, - * and a job causing the Bus/Page fault shouldn't complete until - * the MMU is updated - */ - kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); - - /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); - fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); - - /* Mark the fault protected or not */ - fault->protected_mode = kbdev->protected_mode; - - if (kbdev->protected_mode && fault->addr) { - /* check if address reporting is allowed */ - validate_protected_page_fault(kbdev); - } - - /* report the fault to debugfs */ - kbase_as_fault_debugfs_new(kbdev, as_no); - - /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); - } - - if (kbase_as_has_bus_fault(as)) { - /* Mark bus fault as handled. - * Note that a bus fault is processed first in case - * where both a bus fault and page fault occur. - */ - bf_bits &= ~(1UL << as_no); - - /* remove the queued BF (and PF) from the mask */ - new_mask &= ~(MMU_BUS_ERROR(as_no) | - MMU_PAGE_FAULT(as_no)); - } else { - /* Mark page fault as handled */ - pf_bits &= ~(1UL << as_no); - - /* remove the queued PF from the mask */ - new_mask &= ~MMU_PAGE_FAULT(as_no); - } - - /* Process the interrupt for this address space */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_interrupt_process(kbdev, kctx, as, fault); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - - /* reenable interrupts */ - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); - new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -} - -void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) -{ - struct kbase_mmu_setup *current_setup = &as->current_setup; - u64 transcfg = 0; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg; - - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ - /* Clear PTW_MEMATTR bits */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; - /* Ensure page-tables reads use read-allocate cache-policy in - * the L2 - */ - transcfg |= AS_TRANSCFG_R_ALLOCATE; - - if (kbdev->system_coherency == COHERENCY_ACE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ - /* Clear PTW_SH bits */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); - } - - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (transcfg >> 32) & 0xFFFFFFFFUL); - } else { - if (kbdev->system_coherency == COHERENCY_ACE) - current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; - } - - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), - current_setup->transtab & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), - (current_setup->transtab >> 32) & 0xFFFFFFFFUL); - - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), - current_setup->memattr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), - (current_setup->memattr >> 32) & 0xFFFFFFFFUL); - - KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, - current_setup->transtab, - current_setup->memattr, - transcfg); - - write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); -} - -int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - u64 vpfn, u32 nr, u32 op, - unsigned int handling_irq) -{ - int ret; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - - if (op == AS_COMMAND_UNLOCK) { - /* Unlock doesn't require a lock first */ - ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); - } else { - u64 lock_addr = lock_region(kbdev, vpfn, nr); - - /* Lock the region that needs to be updated */ - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), - lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), - (lock_addr >> 32) & 0xFFFFFFFFUL); - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); - - /* Run the MMU operation */ - write_cmd(kbdev, as->number, op); - - /* Wait for the flush to complete */ - ret = wait_ready(kbdev, as->number); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { - /* Issue an UNLOCK command to ensure that valid page - tables are re-read by the GPU after an update. - Note that, the FLUSH command should perform all the - actions necessary, however the bus logs show that if - multiple page faults occur within an 8 page region - the MMU does not always re-read the updated page - table entries for later faults or is only partially - read, it subsequently raises the page fault IRQ for - the same addresses, the unlock ensures that the MMU - cache is flushed, so updates can be re-read. As the - region is now unlocked we need to issue 2 UNLOCK - commands in order to flush the MMU/uTLB, - see PRLAM-8812. - */ - write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); - write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); - } - } - - return ret; -} - -void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type) -{ - unsigned long flags; - u32 pf_bf_mask; - - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - - /* - * A reset is in-flight and we're flushing the IRQ + bottom half - * so don't update anything as it could race with the reset code. - */ - if (kbdev->irq_reset_flush) - goto unlock; - - /* Clear the page (and bus fault IRQ as well in case one occurred) */ - pf_bf_mask = MMU_PAGE_FAULT(as->number); - if (type == KBASE_MMU_FAULT_TYPE_BUS || - type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) - pf_bf_mask |= MMU_BUS_ERROR(as->number); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); - -unlock: - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -} - -void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type) -{ - unsigned long flags; - u32 irq_mask; - - /* Enable the page fault IRQ (and bus fault IRQ as well in case one - * occurred) */ - spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - - /* - * A reset is in-flight and we're flushing the IRQ + bottom half - * so don't update anything as it could race with the reset code. - */ - if (kbdev->irq_reset_flush) - goto unlock; - - irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | - MMU_PAGE_FAULT(as->number); - - if (type == KBASE_MMU_FAULT_TYPE_BUS || - type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) - irq_mask |= MMU_BUS_ERROR(as->number); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); - -unlock: - spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h deleted file mode 100755 index 1f76eeda2324..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Interface file for the direct implementation for MMU hardware access - * - * Direct MMU hardware interface - * - * This module provides the interface(s) that are required by the direct - * register access implementation of the MMU hardware interface - */ - -#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ -#define _MALI_KBASE_MMU_HW_DIRECT_H_ - -#include - -/** - * kbase_mmu_interrupt - Process an MMU interrupt. - * - * Process the MMU interrupt that was reported by the &kbase_device. - * - * @kbdev: kbase context to clear the fault from. - * @irq_stat: Value of the MMU_IRQ_STATUS register - */ -void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); - -#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.c deleted file mode 100755 index 51a10a231df0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * "Always on" power management policy - */ - -#include -#include - -static bool always_on_shaders_needed(struct kbase_device *kbdev) -{ - return true; -} - -static bool always_on_get_core_active(struct kbase_device *kbdev) -{ - return true; -} - -static void always_on_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void always_on_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -/* - * The struct kbase_pm_policy structure for the demand power policy. - * - * This is the static structure that defines the demand power policy's callback - * and name. - */ -const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { - "always_on", /* name */ - always_on_init, /* init */ - always_on_term, /* term */ - always_on_shaders_needed, /* shaders_needed */ - always_on_get_core_active, /* get_core_active */ - 0u, /* flags */ - KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ -}; - -KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.h deleted file mode 100755 index e7927cf82e5a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_always_on.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * "Always on" power management policy - */ - -#ifndef MALI_KBASE_PM_ALWAYS_ON_H -#define MALI_KBASE_PM_ALWAYS_ON_H - -/** - * DOC: - * The "Always on" power management policy has the following - * characteristics: - * - * - When KBase indicates that the GPU will be powered up, but we don't yet - * know which Job Chains are to be run: - * Shader Cores are powered up, regardless of whether or not they will be - * needed later. - * - * - When KBase indicates that Shader Cores are needed to submit the currently - * queued Job Chains: - * Shader Cores are kept powered, regardless of whether or not they will be - * needed - * - * - When KBase indicates that the GPU need not be powered: - * The Shader Cores are kept powered, regardless of whether or not they will - * be needed. The GPU itself is also kept powered, even though it is not - * needed. - * - * This policy is automatically overridden during system suspend: the desired - * core state is ignored, and the cores are forced off regardless of what the - * policy requests. After resuming from suspend, new changes to the desired - * core state made by the policy are honored. - * - * Note: - * - * - KBase indicates the GPU will be powered up when it has a User Process that - * has just started to submit Job Chains. - * - * - KBase indicates the GPU need not be powered when all the Job Chains from - * User Processes have finished, and it is waiting for a User Process to - * submit some more Job Chains. - */ - -/** - * struct kbasep_pm_policy_always_on - Private struct for policy instance data - * @dummy: unused dummy variable - * - * This contains data that is private to the particular power policy that is - * active. - */ -struct kbasep_pm_policy_always_on { - int dummy; -}; - -extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; - -#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_backend.c deleted file mode 100755 index c19a0d134696..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_backend.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * GPU backend implementation of base kernel power management APIs - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); -static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); - -int kbase_pm_runtime_init(struct kbase_device *kbdev) -{ - struct kbase_pm_callback_conf *callbacks; - - callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; - if (callbacks) { - kbdev->pm.backend.callback_power_on = - callbacks->power_on_callback; - kbdev->pm.backend.callback_power_off = - callbacks->power_off_callback; - kbdev->pm.backend.callback_power_suspend = - callbacks->power_suspend_callback; - kbdev->pm.backend.callback_power_resume = - callbacks->power_resume_callback; - kbdev->pm.callback_power_runtime_init = - callbacks->power_runtime_init_callback; - kbdev->pm.callback_power_runtime_term = - callbacks->power_runtime_term_callback; - kbdev->pm.backend.callback_power_runtime_on = - callbacks->power_runtime_on_callback; - kbdev->pm.backend.callback_power_runtime_off = - callbacks->power_runtime_off_callback; - kbdev->pm.backend.callback_power_runtime_idle = - callbacks->power_runtime_idle_callback; - - if (callbacks->power_runtime_init_callback) - return callbacks->power_runtime_init_callback(kbdev); - else - return 0; - } - - kbdev->pm.backend.callback_power_on = NULL; - kbdev->pm.backend.callback_power_off = NULL; - kbdev->pm.backend.callback_power_suspend = NULL; - kbdev->pm.backend.callback_power_resume = NULL; - kbdev->pm.callback_power_runtime_init = NULL; - kbdev->pm.callback_power_runtime_term = NULL; - kbdev->pm.backend.callback_power_runtime_on = NULL; - kbdev->pm.backend.callback_power_runtime_off = NULL; - kbdev->pm.backend.callback_power_runtime_idle = NULL; - - return 0; -} - -void kbase_pm_runtime_term(struct kbase_device *kbdev) -{ - if (kbdev->pm.callback_power_runtime_term) { - kbdev->pm.callback_power_runtime_term(kbdev); - } -} - -void kbase_pm_register_access_enable(struct kbase_device *kbdev) -{ - struct kbase_pm_callback_conf *callbacks; - - callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; - - if (callbacks) - callbacks->power_on_callback(kbdev); - - kbdev->pm.backend.gpu_powered = true; -} - -void kbase_pm_register_access_disable(struct kbase_device *kbdev) -{ - struct kbase_pm_callback_conf *callbacks; - - callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; - - if (callbacks) - callbacks->power_off_callback(kbdev); - - kbdev->pm.backend.gpu_powered = false; -} - -int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev) -{ - int ret = 0; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_init(&kbdev->pm.lock); - - kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", - WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!kbdev->pm.backend.gpu_poweroff_wait_wq) - return -ENOMEM; - - INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, - kbase_pm_gpu_poweroff_wait_wq); - - kbdev->pm.backend.ca_cores_enabled = ~0ull; - kbdev->pm.backend.gpu_powered = false; - kbdev->pm.suspending = false; -#ifdef CONFIG_MALI_DEBUG - kbdev->pm.backend.driver_ready_for_irqs = false; -#endif /* CONFIG_MALI_DEBUG */ - init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); - - /* Initialise the metrics subsystem */ - ret = kbasep_pm_metrics_init(kbdev); - if (ret) - return ret; - - init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); - kbdev->pm.backend.reset_done = false; - - init_waitqueue_head(&kbdev->pm.zero_active_count_wait); - kbdev->pm.active_count = 0; - - spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); - spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); - - init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); - - if (kbase_pm_ca_init(kbdev) != 0) - goto workq_fail; - - if (kbase_pm_policy_init(kbdev) != 0) - goto pm_policy_fail; - - if (kbase_pm_state_machine_init(kbdev) != 0) - goto pm_state_machine_fail; - - return 0; - -pm_state_machine_fail: - kbase_pm_policy_term(kbdev); -pm_policy_fail: - kbase_pm_ca_term(kbdev); -workq_fail: - kbasep_pm_metrics_term(kbdev); - return -EINVAL; -} - -int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - kbdev->pm.backend.hwcnt_desired = false; - kbdev->pm.backend.hwcnt_disabled = true; - INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, - kbase_pm_hwcnt_disable_worker); - kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - - return 0; -} - -void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) -{ - lockdep_assert_held(&kbdev->pm.lock); - - /* Turn clocks and interrupts on - no-op if we haven't done a previous - * kbase_pm_clock_off() */ - kbase_pm_clock_on(kbdev, is_resume); - - if (!is_resume) { - unsigned long flags; - - /* Force update of L2 state - if we have abandoned a power off - * then this may be required to power the L2 back on. - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - - /* Update core status as required by the policy */ - kbase_pm_update_cores_state(kbdev); - - /* NOTE: We don't wait to reach the desired state, since running atoms - * will wait for that state to be reached anyway */ -} - -static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) -{ - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.gpu_poweroff_wait_work); - struct kbase_pm_device_data *pm = &kbdev->pm; - struct kbase_pm_backend_data *backend = &pm->backend; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - unsigned long flags; - - if (!platform_power_down_only) - /* Wait for power transitions to complete. We do this with no locks held - * so that we don't deadlock with any pending workqueues. - */ - kbase_pm_wait_for_desired_state(kbdev); - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - if (!backend->poweron_required) { - if (!platform_power_down_only) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - WARN_ON(backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF || - backend->l2_state != KBASE_L2_OFF); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - - /* Disable interrupts and turn the clock off */ - if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) { - /* - * Page/bus faults are pending, must drop locks to - * process. Interrupts are disabled so no more faults - * should be generated at this point. - */ - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - kbase_flush_mmu_wqs(kbdev); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - /* Turn off clock now that fault have been handled. We - * dropped locks so poweron_required may have changed - - * power back on if this is the case (effectively only - * re-enabling of the interrupts would be done in this - * case, as the clocks to GPU were not withdrawn yet). - */ - if (backend->poweron_required) - kbase_pm_clock_on(kbdev, false); - else - WARN_ON(!kbase_pm_clock_off(kbdev, - backend->poweroff_is_suspend)); - } - } - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - backend->poweroff_wait_in_progress = false; - if (backend->poweron_required) { - backend->poweron_required = false; - kbdev->pm.backend.l2_desired = true; - kbase_pm_update_state(kbdev); - kbase_pm_update_cores_state_nolock(kbdev); - kbase_backend_slot_update(kbdev); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - wake_up(&kbdev->pm.backend.poweroff_wait); -} - -static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) -{ - struct kbase_device *kbdev = container_of(data, struct kbase_device, - pm.backend.hwcnt_disable_work); - struct kbase_pm_device_data *pm = &kbdev->pm; - struct kbase_pm_backend_data *backend = &pm->backend; - unsigned long flags; - - bool do_disable; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (!do_disable) - return; - - kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; - - if (do_disable) { - /* PM state did not change while we were doing the disable, - * so commit the work we just performed and continue the state - * machine. - */ - backend->hwcnt_disabled = true; - kbase_pm_update_state(kbdev); - } else { - /* PM state was updated while we were doing the disable, - * so we need to undo the disable we just performed. - */ - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) -{ - unsigned long flags; - - lockdep_assert_held(&kbdev->pm.lock); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - spin_lock(&kbdev->pm.backend.gpu_powered_lock); - if (!kbdev->pm.backend.gpu_powered) { - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); - goto unlock_hwaccess; - } else { - spin_unlock(&kbdev->pm.backend.gpu_powered_lock); - } - - if (kbdev->pm.backend.poweroff_wait_in_progress) - goto unlock_hwaccess; - - /* Force all cores off */ - kbdev->pm.backend.shaders_desired = false; - kbdev->pm.backend.l2_desired = false; - - kbdev->pm.backend.poweroff_wait_in_progress = true; - kbdev->pm.backend.poweroff_is_suspend = is_suspend; - kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; - - /* l2_desired being false should cause the state machine to - * start powering off the L2. When it actually is powered off, - * the interrupt handler will call kbase_pm_l2_update_state() - * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. - * Callers of this function will need to wait on poweroff_wait. - */ - kbase_pm_update_state(kbdev); - -unlock_hwaccess: - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) -{ - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); -} - -int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, - unsigned int flags) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - unsigned long irq_flags; - int ret; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - /* A suspend won't happen during startup/insmod */ - KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); - - /* Power up the GPU, don't enable IRQs as we are not ready to receive - * them. */ - ret = kbase_pm_init_hw(kbdev, flags); - if (ret) { - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - return ret; - } - - kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = - kbdev->pm.debug_core_mask[1] = - kbdev->pm.debug_core_mask[2] = - kbdev->gpu_props.props.raw_props.shader_present; - - /* Pretend the GPU is active to prevent a power policy turning the GPU - * cores off */ - kbdev->pm.active_count = 1; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); - /* Ensure cycle counter is off */ - kbdev->pm.backend.gpu_cycle_counter_requests = 0; - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); - - /* We are ready to receive IRQ's now as power policy is set up, so - * enable them now. */ -#ifdef CONFIG_MALI_DEBUG - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags); - kbdev->pm.backend.driver_ready_for_irqs = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags); -#endif - kbase_pm_enable_interrupts(kbdev); - - /* Turn on the GPU and any cores needed by the policy */ - kbase_pm_do_poweron(kbdev, false); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - /* Idle the GPU and/or cores, if the policy wants it to */ - kbase_pm_context_idle(kbdev); - - return 0; -} - -void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&kbdev->pm.lock); - kbase_pm_do_poweroff(kbdev, false); - mutex_unlock(&kbdev->pm.lock); -} - -KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); - -void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); - - /* Free any resources the policy allocated */ - kbase_pm_state_machine_term(kbdev); - kbase_pm_policy_term(kbdev); - kbase_pm_ca_term(kbdev); - - /* Shut down the metrics subsystem */ - kbasep_pm_metrics_term(kbdev); - - destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); -} - -void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); - - if (kbdev->pm.backend.hwcnt_disabled) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } -} - -void kbase_pm_power_changed(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - - kbase_backend_slot_update(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask_js0, u64 new_core_mask_js1, - u64 new_core_mask_js2) -{ - kbdev->pm.debug_core_mask[0] = new_core_mask_js0; - kbdev->pm.debug_core_mask[1] = new_core_mask_js1; - kbdev->pm.debug_core_mask[2] = new_core_mask_js2; - kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | - new_core_mask_js2; - - kbase_pm_update_cores_state_nolock(kbdev); -} - -void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) -{ - kbase_pm_update_active(kbdev); -} - -void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) -{ - kbase_pm_update_active(kbdev); -} - -void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - /* Force power off the GPU and all cores (regardless of policy), only - * after the PM active count reaches zero (otherwise, we risk turning it - * off prematurely) */ - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - kbase_pm_do_poweroff(kbdev, true); - - kbase_backend_timer_suspend(kbdev); - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - kbase_pm_wait_for_poweroff_complete(kbdev); -} - -void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - kbdev->pm.suspending = false; - kbase_pm_do_poweron(kbdev, true); - - kbase_backend_timer_resume(kbdev); - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.c deleted file mode 100755 index 2cb9452d7f60..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Base kernel core availability APIs - */ - -#include -#include -#include - -int kbase_pm_ca_init(struct kbase_device *kbdev) -{ -#ifdef CONFIG_MALI_DEVFREQ - struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; - - if (kbdev->current_core_mask) - pm_backend->ca_cores_enabled = kbdev->current_core_mask; - else - pm_backend->ca_cores_enabled = - kbdev->gpu_props.props.raw_props.shader_present; -#endif - - return 0; -} - -void kbase_pm_ca_term(struct kbase_device *kbdev) -{ -} - -#ifdef CONFIG_MALI_DEVFREQ -void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) -{ - struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (!(core_mask & kbdev->pm.debug_core_mask_all)) { - dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", - core_mask, kbdev->pm.debug_core_mask_all); - goto unlock; - } - - pm_backend->ca_cores_enabled = core_mask; - - kbase_pm_update_state(kbdev); - -unlock: - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", - pm_backend->ca_cores_enabled); -} -#endif - -u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) -{ - struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* All cores must be enabled when instrumentation is in use */ - if (pm_backend->instr_enabled) - return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask_all; - -#ifdef CONFIG_MALI_DEVFREQ - return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; -#else - return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask_all; -#endif -} - -KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); - -void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - kbdev->pm.backend.instr_enabled = true; -} - -void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - kbdev->pm.backend.instr_enabled = false; -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.h deleted file mode 100755 index 274581d0393a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Base kernel core availability APIs - */ - -#ifndef _KBASE_PM_CA_H_ -#define _KBASE_PM_CA_H_ - -/** - * kbase_pm_ca_init - Initialize core availability framework - * - * Must be called before calling any other core availability function - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: 0 if the core availability framework was successfully initialized, - * -errno otherwise - */ -int kbase_pm_ca_init(struct kbase_device *kbdev); - -/** - * kbase_pm_ca_term - Terminate core availability framework - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_ca_term(struct kbase_device *kbdev); - -/** - * kbase_pm_ca_get_core_mask - Get currently available shaders core mask - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Returns a mask of the currently available shader cores. - * Calls into the core availability policy - * - * Return: The bit mask of available cores - */ -u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); - -/** - * kbase_pm_ca_update_core_status - Update core status - * - * @kbdev: The kbase device structure for the device (must be - * a valid pointer) - * @cores_ready: The bit mask of cores ready for job submission - * @cores_transitioning: The bit mask of cores that are transitioning power - * state - * - * Update core availability policy with current core power status - * - * Calls into the core availability policy - */ -void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, - u64 cores_transitioning); - -/** - * kbase_pm_ca_instr_enable - Enable override for instrumentation - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * This overrides the output of the core availability policy, ensuring that all - * cores are available - */ -void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_ca_instr_disable - Disable override for instrumentation - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * This disables any previously enabled override, and resumes normal policy - * functionality - */ -void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); - -#endif /* _KBASE_PM_CA_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h deleted file mode 100755 index f67ec650c981..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * A core availability policy for use with devfreq, where core masks are - * associated with OPPs. - */ - -#ifndef MALI_KBASE_PM_CA_DEVFREQ_H -#define MALI_KBASE_PM_CA_DEVFREQ_H - -/** - * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy - * - * This contains data that is private to the devfreq core availability - * policy. - * - * @cores_desired: Cores that the policy wants to be available - * @cores_enabled: Cores that the policy is currently returning as available - * @cores_used: Cores currently powered or transitioning - */ -struct kbasep_pm_ca_policy_devfreq { - u64 cores_desired; - u64 cores_enabled; - u64 cores_used; -}; - -extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; - -/** - * kbase_devfreq_set_core_mask - Set core mask for policy to use - * @kbdev: Device pointer - * @core_mask: New core mask - * - * The new core mask will have immediate effect if the GPU is powered, or will - * take effect when it is next powered on. - */ -void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); - -#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ - diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c deleted file mode 100755 index e90c44def25e..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * "Coarse Demand" power management policy - */ - -#include -#include - -static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) -{ - return kbase_pm_is_active(kbdev); -} - -static bool coarse_demand_get_core_active(struct kbase_device *kbdev) -{ - return kbase_pm_is_active(kbdev); -} - -static void coarse_demand_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void coarse_demand_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -/* The struct kbase_pm_policy structure for the demand power policy. - * - * This is the static structure that defines the demand power policy's callback - * and name. - */ -const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { - "coarse_demand", /* name */ - coarse_demand_init, /* init */ - coarse_demand_term, /* term */ - coarse_demand_shaders_needed, /* shaders_needed */ - coarse_demand_get_core_active, /* get_core_active */ - 0u, /* flags */ - KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ -}; - -KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h deleted file mode 100755 index 304e5d7fa32d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * "Coarse Demand" power management policy - */ - -#ifndef MALI_KBASE_PM_COARSE_DEMAND_H -#define MALI_KBASE_PM_COARSE_DEMAND_H - -/** - * DOC: - * The "Coarse" demand power management policy has the following - * characteristics: - * - When KBase indicates that the GPU will be powered up, but we don't yet - * know which Job Chains are to be run: - * - Shader Cores are powered up, regardless of whether or not they will be - * needed later. - * - When KBase indicates that Shader Cores are needed to submit the currently - * queued Job Chains: - * - Shader Cores are kept powered, regardless of whether or not they will - * be needed - * - When KBase indicates that the GPU need not be powered: - * - The Shader Cores are powered off, and the GPU itself is powered off too. - * - * @note: - * - KBase indicates the GPU will be powered up when it has a User Process that - * has just started to submit Job Chains. - * - KBase indicates the GPU need not be powered when all the Job Chains from - * User Processes have finished, and it is waiting for a User Process to - * submit some more Job Chains. - */ - -/** - * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand - * policy - * - * This contains data that is private to the coarse demand power policy. - * - * @dummy: Dummy member - no state needed - */ -struct kbasep_pm_policy_coarse_demand { - int dummy; -}; - -extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; - -#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_defs.h deleted file mode 100755 index 0cff22e19d99..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_defs.h +++ /dev/null @@ -1,472 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Backend-specific Power Manager definitions - */ - -#ifndef _KBASE_PM_HWACCESS_DEFS_H_ -#define _KBASE_PM_HWACCESS_DEFS_H_ - -#include "mali_kbase_pm_always_on.h" -#include "mali_kbase_pm_coarse_demand.h" -#if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_pm_always_on_demand.h" -#endif - -/* Forward definition - see mali_kbase.h */ -struct kbase_device; -struct kbase_jd_atom; - -/** - * enum kbase_pm_core_type - The types of core in a GPU. - * - * These enumerated values are used in calls to - * - kbase_pm_get_present_cores() - * - kbase_pm_get_active_cores() - * - kbase_pm_get_trans_cores() - * - kbase_pm_get_ready_cores(). - * - * They specify which type of core should be acted on. These values are set in - * a manner that allows core_type_to_reg() function to be simpler and more - * efficient. - * - * @KBASE_PM_CORE_L2: The L2 cache - * @KBASE_PM_CORE_SHADER: Shader cores - * @KBASE_PM_CORE_TILER: Tiler cores - * @KBASE_PM_CORE_STACK: Core stacks - */ -enum kbase_pm_core_type { - KBASE_PM_CORE_L2 = L2_PRESENT_LO, - KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, - KBASE_PM_CORE_TILER = TILER_PRESENT_LO, - KBASE_PM_CORE_STACK = STACK_PRESENT_LO -}; - -/** - * enum kbase_l2_core_state - The states used for the L2 cache & tiler power - * state machine. - * - * @KBASE_L2_OFF: The L2 cache and tiler are off - * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on - * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being - * enabled - * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled - * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being - * disabled - * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off - * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off - * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state - * are unknown - */ -enum kbase_l2_core_state { - KBASE_L2_OFF = 0, - KBASE_L2_PEND_ON, - KBASE_L2_ON_HWCNT_ENABLE, - KBASE_L2_ON, - KBASE_L2_ON_HWCNT_DISABLE, - KBASE_L2_POWER_DOWN, - KBASE_L2_PEND_OFF, - KBASE_L2_RESET_WAIT -}; - -/** - * enum kbase_shader_core_state - The states used for the shaders' state machine. - * - * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have - * been requested to power on - * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been - * requested to power on - * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on - * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to - * power off, but they remain on for the - * duration of the hysteresis timer - * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired - * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are - * off, but the tick timer - * cancellation is still - * pending. - * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power - * states are unknown - */ -enum kbase_shader_core_state { - KBASE_SHADERS_OFF_CORESTACK_OFF = 0, - KBASE_SHADERS_OFF_CORESTACK_PEND_ON, - KBASE_SHADERS_PEND_ON_CORESTACK_ON, - KBASE_SHADERS_ON_CORESTACK_ON, - KBASE_SHADERS_WAIT_OFF_CORESTACK_ON, - KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON, - KBASE_SHADERS_PEND_OFF_CORESTACK_ON, - KBASE_SHADERS_OFF_CORESTACK_PEND_OFF, - KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF, - KBASE_SHADERS_RESET_WAIT -}; - -/** - * struct kbasep_pm_metrics - Metrics data collected for use by the power - * management framework. - * - * @time_busy: number of ns the GPU was busy executing jobs since the - * @time_period_start timestamp. - * @time_idle: number of ns since time_period_start the GPU was not executing - * jobs since the @time_period_start timestamp. - * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that - * if two CL jobs were active for 400ns, this value would be updated - * with 800. - * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that - * if two GL jobs were active for 400ns, this value would be updated - * with 800. - */ -struct kbasep_pm_metrics { - u32 time_busy; - u32 time_idle; - u32 busy_cl[2]; - u32 busy_gl; -}; - -/** - * struct kbasep_pm_metrics_state - State required to collect the metrics in - * struct kbasep_pm_metrics - * @time_period_start: time at which busy/idle measurements started - * @gpu_active: true when the GPU is executing jobs. false when - * not. Updated when the job scheduler informs us a job in submitted - * or removed from a GPU slot. - * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. - * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As - * GL jobs never run on slot 2 this slot is not recorded. - * @lock: spinlock protecting the kbasep_pm_metrics_data structure - * @platform_data: pointer to data controlled by platform specific code - * @kbdev: pointer to kbase device for which metrics are collected - * @values: The current values of the power management metrics. The - * kbase_pm_get_dvfs_metrics() function is used to compare these - * current values with the saved values from a previous invocation. - * @timer: timer to regularly make DVFS decisions based on the power - * management metrics. - * @timer_active: boolean indicating @timer is running - * @dvfs_last: values of the PM metrics from the last DVFS tick - * @dvfs_diff: different between the current and previous PM metrics. - */ -struct kbasep_pm_metrics_state { - ktime_t time_period_start; - bool gpu_active; - u32 active_cl_ctx[2]; - u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */ - spinlock_t lock; - - void *platform_data; - struct kbase_device *kbdev; - - struct kbasep_pm_metrics values; - -#ifdef CONFIG_MALI_MIDGARD_DVFS - struct hrtimer timer; - bool timer_active; - struct kbasep_pm_metrics dvfs_last; - struct kbasep_pm_metrics dvfs_diff; -#endif -}; - -/** - * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer - * @wq: Work queue to wait for the timer to stopped - * @work: Work item which cancels the timer - * @timer: Timer for powering off the shader cores - * @configured_interval: Period of GPU poweroff timer - * @configured_ticks: User-configured number of ticks to wait after the shader - * power down request is received before turning off the cores - * @remaining_ticks: Number of remaining timer ticks until shaders are powered off - * @cancel_queued: True if the cancellation work item has been queued. This is - * required to ensure that it is not queued twice, e.g. after - * a reset, which could cause the timer to be incorrectly - * cancelled later by a delayed workitem. - * @needed: Whether the timer should restart itself - */ -struct kbasep_pm_tick_timer_state { - struct workqueue_struct *wq; - struct work_struct work; - struct hrtimer timer; - - ktime_t configured_interval; - unsigned int configured_ticks; - unsigned int remaining_ticks; - - bool cancel_queued; - bool needed; -}; - -union kbase_pm_policy_data { - struct kbasep_pm_policy_always_on always_on; - struct kbasep_pm_policy_coarse_demand coarse_demand; -#if !MALI_CUSTOMER_RELEASE - struct kbasep_pm_policy_always_on_demand always_on_demand; -#endif -}; - -/** - * struct kbase_pm_backend_data - Data stored per device for power management. - * - * This structure contains data for the power management framework. There is one - * instance of this structure per device in the system. - * - * @pm_current_policy: The policy that is currently actively controlling the - * power state. - * @pm_policy_data: Private data for current PM policy - * @reset_done: Flag when a reset is complete - * @reset_done_wait: Wait queue to wait for changes to @reset_done - * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter - * users - * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests - * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired - * state according to the L2 and shader power state - * machines - * @gpu_powered: Set to true when the GPU is powered and register - * accesses are possible, false otherwise - * @instr_enabled: Set to true when instrumentation is enabled, - * false otherwise - * @cg1_disabled: Set if the policy wants to keep the second core group - * powered off - * @driver_ready_for_irqs: Debug state indicating whether sufficient - * initialization of the driver has occurred to handle - * IRQs - * @gpu_powered_lock: Spinlock that must be held when writing @gpu_powered or - * accessing @driver_ready_for_irqs - * @metrics: Structure to hold metrics for the GPU - * @shader_tick_timer: Structure to hold the shader poweroff tick timer state - * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. - * hwaccess_lock must be held when accessing - * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state - * machine should invoke the poweroff - * worker after the L2 has turned off. - * @poweron_required: true if a GPU power on is required. Should only be set - * when poweroff_wait_in_progress is true, and therefore the - * GPU can not immediately be powered on. pm.lock must be - * held when accessing - * @poweroff_is_suspend: true if the GPU is being powered off due to a suspend - * request. pm.lock must be held when accessing - * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off - * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq - * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete - * @callback_power_on: Callback when the GPU needs to be turned on. See - * &struct kbase_pm_callback_conf - * @callback_power_off: Callback when the GPU may be turned off. See - * &struct kbase_pm_callback_conf - * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to - * be turned off. See &struct kbase_pm_callback_conf - * @callback_power_resume: Callback when a resume occurs and the GPU needs to - * be turned on. See &struct kbase_pm_callback_conf - * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See - * &struct kbase_pm_callback_conf - * @callback_power_runtime_off: Callback when the GPU may be turned off. See - * &struct kbase_pm_callback_conf - * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See - * &struct kbase_pm_callback_conf - * @ca_cores_enabled: Cores that are currently available - * @l2_state: The current state of the L2 cache state machine. See - * &enum kbase_l2_core_state - * @l2_desired: True if the L2 cache should be powered on by the L2 cache state - * machine - * @shaders_state: The current state of the shader state machine. - * @shaders_avail: This is updated by the state machine when it is in a state - * where it can handle changes to the core availability. This - * is internal to the shader state machine and should *not* be - * modified elsewhere. - * @shaders_desired: True if the PM active count or power policy requires the - * shader cores to be on. This is used as an input to the - * shader power state machine. The current state of the - * cores may be different, but there should be transitions in - * progress that will eventually achieve this state (assuming - * that the policy doesn't change its mind in the mean time). - * @in_reset: True if a GPU is resetting and normal power manager operation is - * suspended - * @protected_transition_override : True if a protected mode transition is in - * progress and is overriding power manager - * behaviour. - * @protected_l2_override : Non-zero if the L2 cache is required during a - * protected mode transition. Has no effect if not - * transitioning. - * @hwcnt_desired: True if we want GPU hardware counters to be enabled. - * @hwcnt_disabled: True if GPU hardware counters are not enabled. - * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if - * atomic disable is not possible. - * - * Note: - * During an IRQ, @pm_current_policy can be NULL when the policy is being - * changed with kbase_pm_set_policy(). The change is protected under - * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context - * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will - * re-issue the policy functions that would have been done under IRQ. - */ -struct kbase_pm_backend_data { - const struct kbase_pm_policy *pm_current_policy; - union kbase_pm_policy_data pm_policy_data; - bool reset_done; - wait_queue_head_t reset_done_wait; - int gpu_cycle_counter_requests; - spinlock_t gpu_cycle_counter_requests_lock; - - wait_queue_head_t gpu_in_desired_state_wait; - - bool gpu_powered; - - bool instr_enabled; - - bool cg1_disabled; - -#ifdef CONFIG_MALI_DEBUG - bool driver_ready_for_irqs; -#endif /* CONFIG_MALI_DEBUG */ - - spinlock_t gpu_powered_lock; - - struct kbasep_pm_metrics_state metrics; - - struct kbasep_pm_tick_timer_state shader_tick_timer; - - bool poweroff_wait_in_progress; - bool invoke_poweroff_wait_wq_when_l2_off; - bool poweron_required; - bool poweroff_is_suspend; - - struct workqueue_struct *gpu_poweroff_wait_wq; - struct work_struct gpu_poweroff_wait_work; - - wait_queue_head_t poweroff_wait; - - int (*callback_power_on)(struct kbase_device *kbdev); - void (*callback_power_off)(struct kbase_device *kbdev); - void (*callback_power_suspend)(struct kbase_device *kbdev); - void (*callback_power_resume)(struct kbase_device *kbdev); - int (*callback_power_runtime_on)(struct kbase_device *kbdev); - void (*callback_power_runtime_off)(struct kbase_device *kbdev); - int (*callback_power_runtime_idle)(struct kbase_device *kbdev); - - u64 ca_cores_enabled; - - enum kbase_l2_core_state l2_state; - enum kbase_shader_core_state shaders_state; - u64 shaders_avail; - bool l2_desired; - bool shaders_desired; - - bool in_reset; - - bool protected_transition_override; - int protected_l2_override; - - bool hwcnt_desired; - bool hwcnt_disabled; - struct work_struct hwcnt_disable_work; -}; - - -/* List of policy IDs */ -enum kbase_pm_policy_id { - KBASE_PM_POLICY_ID_COARSE_DEMAND, -#if !MALI_CUSTOMER_RELEASE - KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, -#endif - KBASE_PM_POLICY_ID_ALWAYS_ON -}; - -typedef u32 kbase_pm_policy_flags; - -#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u) - -/** - * struct kbase_pm_policy - Power policy structure. - * - * Each power policy exposes a (static) instance of this structure which - * contains function pointers to the policy's methods. - * - * @name: The name of this policy - * @init: Function called when the policy is selected - * @term: Function called when the policy is unselected - * @shaders_needed: Function called to find out if shader cores are needed - * @get_core_active: Function called to get the current overall GPU power - * state - * @flags: Field indicating flags for this policy - * @id: Field indicating an ID for this policy. This is not - * necessarily the same as its index in the list returned - * by kbase_pm_list_policies(). - * It is used purely for debugging. - */ -struct kbase_pm_policy { - char *name; - - /** - * Function called when the policy is selected - * - * This should initialize the kbdev->pm.pm_policy_data structure. It - * should not attempt to make any changes to hardware state. - * - * It is undefined what state the cores are in when the function is - * called. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - */ - void (*init)(struct kbase_device *kbdev); - - /** - * Function called when the policy is unselected. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - */ - void (*term)(struct kbase_device *kbdev); - - /** - * Function called to find out if shader cores are needed - * - * This needs to at least satisfy kbdev->pm.backend.shaders_desired, - * and so must never return false when shaders_desired is true. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - * - * Return: true if shader cores are needed, false otherwise - */ - bool (*shaders_needed)(struct kbase_device *kbdev); - - /** - * Function called to get the current overall GPU power state - * - * This function must meet or exceed the requirements for power - * indicated by kbase_pm_is_active(). - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - * - * Return: true if the GPU should be powered, false otherwise - */ - bool (*get_core_active)(struct kbase_device *kbdev); - - kbase_pm_policy_flags flags; - enum kbase_pm_policy_id id; -}; - -#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_driver.c deleted file mode 100755 index 2e6599a0a5c3..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_driver.c +++ /dev/null @@ -1,1923 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel Power Management hardware control - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_MALI_CORESTACK -bool corestack_driver_control = true; -#else -bool corestack_driver_control; /* Default value of 0/false */ -#endif -module_param(corestack_driver_control, bool, 0000); -MODULE_PARM_DESC(corestack_driver_control, - "Let the driver power on/off the GPU core stack independently " - "without involving the Power Domain Controller. This should " - "only be enabled on platforms for which integration of the PDC " - "to the Mali GPU is known to be problematic."); -KBASE_EXPORT_TEST_API(corestack_driver_control); - -bool platform_power_down_only = PLATFORM_POWER_DOWN_ONLY; -module_param(platform_power_down_only, bool, 0000); -MODULE_PARM_DESC(platform_power_down_only, - "Disable power down of individual cores."); - -/** - * enum kbasep_pm_action - Actions that can be performed on a core. - * - * This enumeration is private to the file. Its values are set to allow - * core_type_to_reg() function, which decodes this enumeration, to be simpler - * and more efficient. - * - * @ACTION_PRESENT: The cores that are present - * @ACTION_READY: The cores that are ready - * @ACTION_PWRON: Power on the cores specified - * @ACTION_PWROFF: Power off the cores specified - * @ACTION_PWRTRANS: The cores that are transitioning - * @ACTION_PWRACTIVE: The cores that are active - */ -enum kbasep_pm_action { - ACTION_PRESENT = 0, - ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), - ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), - ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), - ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), - ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) -}; - -static u64 kbase_pm_get_state( - struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action); - -static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) -{ - if (kbdev->pm.backend.protected_transition_override && - kbdev->pm.backend.protected_l2_override) - return true; - - if (kbdev->pm.backend.protected_transition_override && - !kbdev->pm.backend.shaders_desired) - return false; - - return kbdev->pm.backend.l2_desired; -} - -void kbase_pm_protected_override_enable(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->pm.backend.protected_transition_override = true; -} -void kbase_pm_protected_override_disable(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbdev->pm.backend.protected_transition_override = false; -} - -void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (override) { - kbdev->pm.backend.protected_l2_override++; - WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); - } else { - kbdev->pm.backend.protected_l2_override--; - WARN_ON(kbdev->pm.backend.protected_l2_override < 0); - } - - kbase_pm_update_state(kbdev); -} - -/** - * core_type_to_reg - Decode a core type and action to a register. - * - * Given a core type (defined by kbase_pm_core_type) and an action (defined - * by kbasep_pm_action) this function will return the register offset that - * will perform the action on the core type. The register returned is the _LO - * register and an offset must be applied to use the _HI register. - * - * @core_type: The type of core - * @action: The type of action - * - * Return: The register offset of the _LO register that performs an action of - * type @action on a core of type @core_type. - */ -static u32 core_type_to_reg(enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) -{ - if (corestack_driver_control) { - if (core_type == KBASE_PM_CORE_STACK) { - switch (action) { - case ACTION_PRESENT: - return STACK_PRESENT_LO; - case ACTION_READY: - return STACK_READY_LO; - case ACTION_PWRON: - return STACK_PWRON_LO; - case ACTION_PWROFF: - return STACK_PWROFF_LO; - case ACTION_PWRTRANS: - return STACK_PWRTRANS_LO; - default: - WARN(1, "Invalid action for core type\n"); - } - } - } - - return (u32)core_type + (u32)action; -} - -#ifdef CONFIG_ARM64 -static void mali_cci_flush_l2(struct kbase_device *kbdev) -{ - const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; - u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - u32 raw; - - /* - * Note that we don't take the cache flush mutex here since - * we expect to be the last user of the L2, all other L2 users - * would have dropped their references, to initiate L2 power - * down, L2 power down being the only valid place for this - * to be called from. - */ - - kbase_reg_write(kbdev, - GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES); - - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); - - /* Wait for cache flush to complete before continuing, exit on - * gpu resets or loop expiry. */ - while (((raw & mask) == 0) && --loops) { - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); - } -} -#endif - -/** - * kbase_pm_invoke - Invokes an action on a core set - * - * This function performs the action given by @action on a set of cores of a - * type given by @core_type. It is a static function used by - * kbase_pm_transition_core_type() - * - * @kbdev: The kbase device structure of the device - * @core_type: The type of core that the action should be performed on - * @cores: A bit mask of cores to perform the action on (low 32 bits) - * @action: The action to perform on the cores - */ -static void kbase_pm_invoke(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - u64 cores, - enum kbasep_pm_action action) -{ - u32 reg; - u32 lo = cores & 0xFFFFFFFF; - u32 hi = (cores >> 32) & 0xFFFFFFFF; - - /* When 'platform_power_down_only' is enabled, no core type should be - * turned off individually. - */ - KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF && - platform_power_down_only)); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - reg = core_type_to_reg(core_type, action); - - KBASE_DEBUG_ASSERT(reg); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - if (cores) { - if (action == ACTION_PWRON) - kbase_trace_mali_pm_power_on(core_type, cores); - else if (action == ACTION_PWROFF) - kbase_trace_mali_pm_power_off(core_type, cores); - } -#endif - - if (cores) { - u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); - - if (action == ACTION_PWRON) - state |= cores; - else if (action == ACTION_PWROFF) - state &= ~cores; - KBASE_TLSTREAM_AUX_PM_STATE(core_type, state); - } - - /* Tracing */ - if (cores) { - if (action == ACTION_PWRON) - switch (core_type) { - case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, - lo); - break; - case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, - NULL, 0u, lo); - break; - case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, - 0u, lo); - break; - default: - break; - } - else if (action == ACTION_PWROFF) - switch (core_type) { - case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, - 0u, lo); - break; - case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, - NULL, 0u, lo); - break; - case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, - 0u, lo); - /* disable snoops before L2 is turned off */ - kbase_pm_cache_snoop_disable(kbdev); - break; - default: - break; - } - } - - if (lo != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); - - if (hi != 0) - kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); -} - -/** - * kbase_pm_get_state - Get information about a core set - * - * This function gets information (chosen by @action) about a set of cores of - * a type given by @core_type. It is a static function used by - * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and - * kbase_pm_get_ready_cores(). - * - * @kbdev: The kbase device structure of the device - * @core_type: The type of core that the should be queried - * @action: The property of the cores to query - * - * Return: A bit mask specifying the state of the cores - */ -static u64 kbase_pm_get_state(struct kbase_device *kbdev, - enum kbase_pm_core_type core_type, - enum kbasep_pm_action action) -{ - u32 reg; - u32 lo, hi; - - reg = core_type_to_reg(core_type, action); - - KBASE_DEBUG_ASSERT(reg); - - lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); - hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); - - return (((u64) hi) << 32) | ((u64) lo); -} - -/** - * kbase_pm_get_present_cores - Get the cores that are present - * - * @kbdev: Kbase device - * @type: The type of cores to query - * - * Return: Bitmask of the cores that are present - */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - switch (type) { - case KBASE_PM_CORE_L2: - return kbdev->gpu_props.props.raw_props.l2_present; - case KBASE_PM_CORE_SHADER: - return kbdev->gpu_props.props.raw_props.shader_present; - case KBASE_PM_CORE_TILER: - return kbdev->gpu_props.props.raw_props.tiler_present; - case KBASE_PM_CORE_STACK: - return kbdev->gpu_props.props.raw_props.stack_present; - default: - break; - } - KBASE_DEBUG_ASSERT(0); - - return 0; -} - -KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); - -/** - * kbase_pm_get_active_cores - Get the cores that are "active" - * (busy processing work) - * - * @kbdev: Kbase device - * @type: The type of cores to query - * - * Return: Bitmask of cores that are active - */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) -{ - return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); -} - -KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); - -/** - * kbase_pm_get_trans_cores - Get the cores that are transitioning between - * power states - * - * @kbdev: Kbase device - * @type: The type of cores to query - * - * Return: Bitmask of cores that are transitioning - */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) -{ - return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); -} - -KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); - -/** - * kbase_pm_get_ready_cores - Get the cores that are powered on - * - * @kbdev: Kbase device - * @type: The type of cores to query - * - * Return: Bitmask of cores that are ready (powered on) - */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type) -{ - u64 result; - - result = kbase_pm_get_state(kbdev, type, ACTION_READY); - - switch (type) { - case KBASE_PM_CORE_SHADER: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, - (u32) result); - break; - case KBASE_PM_CORE_TILER: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, - (u32) result); - break; - case KBASE_PM_CORE_L2: - KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, - (u32) result); - break; - default: - break; - } - - return result; -} - -KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); - -static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev) -{ - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; - u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; - enum kbase_l2_core_state prev_state; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - do { - /* Get current state */ - u64 l2_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_L2); - u64 l2_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2); - u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, - KBASE_PM_CORE_TILER); - u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER); - - /* mask off ready from trans in case transitions finished - * between the register reads - */ - l2_trans &= ~l2_ready; - tiler_trans &= ~tiler_ready; - - prev_state = backend->l2_state; - - switch (backend->l2_state) { - case KBASE_L2_OFF: - if (kbase_pm_is_l2_desired(kbdev)) { - /* L2 is required, power on. Powering on the - * tiler will also power the first L2 cache. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, - tiler_present, ACTION_PWRON); - - /* If we have more than one L2 cache then we - * must power them on explicitly. - */ - if (l2_present != 1) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present & ~1, - ACTION_PWRON); - backend->l2_state = KBASE_L2_PEND_ON; - } - break; - - case KBASE_L2_PEND_ON: - if (!l2_trans && l2_ready == l2_present && !tiler_trans - && tiler_ready == tiler_present) { - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, - (u32)tiler_ready); - /* - * Ensure snoops are enabled after L2 is powered - * up. Note that kbase keeps track of the snoop - * state, so safe to repeatedly call. - */ - kbase_pm_cache_snoop_enable(kbdev); - - /* With the L2 enabled, we can now enable - * hardware counters. - */ - backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; - - /* Now that the L2 is on, the shaders can start - * powering on if they're required. The obvious - * way to do this would be to call - * kbase_pm_shaders_update_state() here. - * However, that would make the two state - * machines mutually recursive, as the opposite - * would be needed for powering down. Instead, - * callers of this function should use the - * kbase_pm_update_state() wrapper, which will - * call the shader state machine immediately - * after the L2 (for power up), or - * automatically re-invoke the L2 state machine - * when the shaders power down. - */ - } - break; - - case KBASE_L2_ON_HWCNT_ENABLE: - backend->hwcnt_desired = true; - if (backend->hwcnt_disabled) { - kbase_hwcnt_context_enable( - kbdev->hwcnt_gpu_ctx); - backend->hwcnt_disabled = false; - } - backend->l2_state = KBASE_L2_ON; - break; - - case KBASE_L2_ON: - if (!kbase_pm_is_l2_desired(kbdev)) { - /* Do not power off L2 until the shaders and - * core stacks are off. - */ - if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) - break; - - /* We need to make sure hardware counters are - * disabled before powering down the L2, to - * prevent loss of data. - * - * We waited until after the cores were powered - * down to prevent ping-ponging between hwcnt - * enabled and disabled, which would have - * happened if userspace submitted more work - * while we were trying to power down. - */ - backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; - } - break; - - case KBASE_L2_ON_HWCNT_DISABLE: - /* If the L2 became desired while we were waiting on the - * worker to do the actual hwcnt disable (which might - * happen if some work was submitted immediately after - * the shaders powered off), then we need to early-out - * of this state and re-enable hwcnt. - * - * If we get lucky, the hwcnt disable might not have - * actually started yet, and the logic in the hwcnt - * enable state will prevent the worker from - * performing the disable entirely, preventing loss of - * any hardware counter data. - * - * If the hwcnt disable has started, then we'll lose - * a tiny amount of hardware counter data between the - * disable and the re-enable occurring. - * - * This loss of data is preferable to the alternative, - * which is to block the shader cores from doing any - * work until we're sure hwcnt has been re-enabled. - */ - if (kbase_pm_is_l2_desired(kbdev)) { - backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; - break; - } - - /* See if we can get away with disabling hwcnt - * atomically, otherwise kick off a worker. - */ - backend->hwcnt_desired = false; - if (!backend->hwcnt_disabled) { - if (kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)) - backend->hwcnt_disabled = true; - else -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, - &backend->hwcnt_disable_work); -#else - queue_work(system_highpri_wq, - &backend->hwcnt_disable_work); -#endif - } - - if (backend->hwcnt_disabled) - backend->l2_state = KBASE_L2_POWER_DOWN; - break; - - case KBASE_L2_POWER_DOWN: - if (!platform_power_down_only) - /* Powering off the L2 will also power off the - * tiler. - */ - kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, - l2_present, - ACTION_PWROFF); - else - /* If L2 cache is powered then we must flush it - * before we power off the GPU. Normally this - * would have been handled when the L2 was - * powered off. - */ - kbase_gpu_start_cache_clean_nolock( - kbdev); - - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, 0u); - - backend->l2_state = KBASE_L2_PEND_OFF; - break; - - case KBASE_L2_PEND_OFF: - if (!platform_power_down_only) { - /* We only need to check the L2 here - if the L2 - * is off then the tiler is definitely also off. - */ - if (!l2_trans && !l2_ready) - /* L2 is now powered off */ - backend->l2_state = KBASE_L2_OFF; - } else { - if (!kbdev->cache_clean_in_progress) - backend->l2_state = KBASE_L2_OFF; - } - break; - - case KBASE_L2_RESET_WAIT: - if (!backend->in_reset) { - /* Reset complete */ - backend->l2_state = KBASE_L2_OFF; - } - break; - - default: - WARN(1, "Invalid state in l2_state: %d", - backend->l2_state); - } - } while (backend->l2_state != prev_state); - - if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && - backend->l2_state == KBASE_L2_OFF) { - kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; - queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, - &kbdev->pm.backend.gpu_poweroff_wait_work); - } - - if (backend->l2_state == KBASE_L2_ON) - return l2_present; - return 0; -} - -static void shader_poweroff_timer_stop_callback(struct work_struct *data) -{ - unsigned long flags; - struct kbasep_pm_tick_timer_state *stt = container_of(data, - struct kbasep_pm_tick_timer_state, work); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); - - hrtimer_cancel(&stt->timer); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - stt->cancel_queued = false; - if (kbdev->pm.backend.gpu_powered) - kbase_pm_update_state(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -/** - * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer - * @kbdev: pointer to kbase device - * - * Synchronization between the shader state machine and the timer thread is - * difficult. This is because situations may arise where the state machine - * wants to start the timer, but the callback is already running, and has - * already passed the point at which it checks whether it is required, and so - * cancels itself, even though the state machine may have just tried to call - * hrtimer_start. - * - * This cannot be stopped by holding hwaccess_lock in the timer thread, - * because there are still infinitesimally small sections at the start and end - * of the callback where the lock is not held. - * - * Instead, a new state is added to the shader state machine, - * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee - * that when the shaders are switched off, the timer has definitely been - * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the - * timer is started, it is guaranteed that either the timer is already running - * (from an availability change or cancelled timer), or hrtimer_start will - * succeed. It is critical to avoid ending up in - * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could - * hang there forever. - */ -static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) -{ - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - stt->needed = false; - - if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { - stt->cancel_queued = true; - queue_work(stt->wq, &stt->work); - } -} - -static void kbase_pm_shaders_update_state(struct kbase_device *kbdev) -{ - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - struct kbasep_pm_tick_timer_state *stt = - &kbdev->pm.backend.shader_tick_timer; - enum kbase_shader_core_state prev_state; - u64 stacks_avail = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (corestack_driver_control) - /* Always power on all the corestacks. Disabling certain - * corestacks when their respective shaders are not in the - * available bitmap is not currently supported. - */ - stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); - - do { - u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); - u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - u64 stacks_trans = 0; - u64 stacks_ready = 0; - - if (corestack_driver_control) { - stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); - stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); - } - - /* mask off ready from trans in case transitions finished - * between the register reads - */ - shaders_trans &= ~shaders_ready; - stacks_trans &= ~stacks_ready; - - prev_state = backend->shaders_state; - - switch (backend->shaders_state) { - case KBASE_SHADERS_OFF_CORESTACK_OFF: - /* Ignore changes to the shader core availability - * except at certain points where we can handle it, - * i.e. off and SHADERS_ON_CORESTACK_ON. - */ - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); - - if (backend->shaders_desired && backend->l2_state == KBASE_L2_ON) { - if (corestack_driver_control) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, - stacks_avail, ACTION_PWRON); - - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_ON; - } - break; - - case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: - if (!stacks_trans && stacks_ready == stacks_avail) { - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail, ACTION_PWRON); - - backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - - } - break; - - case KBASE_SHADERS_PEND_ON_CORESTACK_ON: - if (!shaders_trans && shaders_ready == backend->shaders_avail) { - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE, - NULL, NULL, 0u, (u32)shaders_ready); - backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; - } - break; - - case KBASE_SHADERS_ON_CORESTACK_ON: - backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev); - - if (!backend->shaders_desired) { - if (kbdev->pm.backend.protected_transition_override || - !stt->configured_ticks || - WARN_ON(stt->cancel_queued)) { - backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; - } else { - stt->remaining_ticks = stt->configured_ticks; - stt->needed = true; - - /* The shader hysteresis timer is not - * done the obvious way, which would be - * to start an hrtimer when the shader - * power off is requested. Instead, - * use a 'tick' timer, and set the - * remaining number of ticks on a power - * off request. This avoids the - * latency of starting, then - * immediately cancelling an hrtimer - * when the shaders are re-requested - * before the timeout expires. - */ - if (!hrtimer_active(&stt->timer)) - hrtimer_start(&stt->timer, - stt->configured_interval, - HRTIMER_MODE_REL); - - backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; - } - } else if (!platform_power_down_only) { - if (backend->shaders_avail & ~shaders_ready) { - backend->shaders_avail |= shaders_ready; - - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - backend->shaders_avail & ~shaders_ready, - ACTION_PWRON); - backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; - - } - } - break; - - case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: - if (WARN_ON(!hrtimer_active(&stt->timer))) { - stt->remaining_ticks = 0; - backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; - } - - if (backend->shaders_desired) { - stt->remaining_ticks = 0; - backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; - } else if (stt->remaining_ticks == 0) { - backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; - } - break; - - case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: - shader_poweroff_timer_queue_cancel(kbdev); - - if (!platform_power_down_only) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, - shaders_ready, ACTION_PWROFF); - - KBASE_TRACE_ADD(kbdev, - PM_CORES_CHANGE_AVAILABLE, - NULL, NULL, 0u, 0u); - - backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; - break; - - case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: - if ((!shaders_trans && !shaders_ready) || platform_power_down_only) { - if (corestack_driver_control && !platform_power_down_only) - kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, - stacks_avail, ACTION_PWROFF); - - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; - } - break; - - case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: - if ((!stacks_trans && !stacks_ready) || platform_power_down_only) - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; - break; - - case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: - if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; - break; - - case KBASE_SHADERS_RESET_WAIT: - /* Reset complete */ - if (!backend->in_reset) - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; - break; - } - } while (backend->shaders_state != prev_state); -} - -static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) -{ - bool in_desired_state = true; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbase_pm_is_l2_desired(kbdev) && - kbdev->pm.backend.l2_state != KBASE_L2_ON) - in_desired_state = false; - else if (!kbase_pm_is_l2_desired(kbdev) && - kbdev->pm.backend.l2_state != KBASE_L2_OFF) - in_desired_state = false; - - if (kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) - in_desired_state = false; - else if (!kbdev->pm.backend.shaders_desired && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) - in_desired_state = false; - - return in_desired_state; -} - -static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) -{ - bool in_desired_state; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return in_desired_state; -} - -static bool kbase_pm_is_in_desired_state_with_l2_powered( - struct kbase_device *kbdev) -{ - bool in_desired_state = false; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbase_pm_is_in_desired_state_nolock(kbdev) && - (kbdev->pm.backend.l2_state == KBASE_L2_ON)) - in_desired_state = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return in_desired_state; -} - -static void kbase_pm_trace_power_state(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_L2)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_SHADER)); - kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_TILER)); - if (corestack_driver_control) - kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK, - kbase_pm_get_ready_cores(kbdev, - KBASE_PM_CORE_STACK)); -#endif - - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_L2, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_L2)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_SHADER, - kbase_pm_get_ready_cores( - kbdev, KBASE_PM_CORE_SHADER)); - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_TILER, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_TILER)); - - if (corestack_driver_control) - KBASE_TLSTREAM_AUX_PM_STATE( - KBASE_PM_CORE_STACK, - kbase_pm_get_ready_cores( - kbdev, - KBASE_PM_CORE_STACK)); -} - -void kbase_pm_update_state(struct kbase_device *kbdev) -{ - enum kbase_shader_core_state prev_shaders_state = - kbdev->pm.backend.shaders_state; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!kbdev->pm.backend.gpu_powered) - return; /* Do nothing if the GPU is off */ - - kbase_pm_l2_update_state(kbdev); - kbase_pm_shaders_update_state(kbdev); - - /* If the shaders just turned off, re-invoke the L2 state machine, in - * case it was waiting for the shaders to turn off before powering down - * the L2. - */ - if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && - kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) - kbase_pm_l2_update_state(kbdev); - - if (kbase_pm_is_in_desired_state_nolock(kbdev)) { - KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, - true, kbdev->pm.backend.shaders_avail); - - kbase_pm_trace_power_state(kbdev); - - KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); - wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); - } -} - -static enum hrtimer_restart -shader_tick_timer_callback(struct hrtimer *timer) -{ - struct kbasep_pm_tick_timer_state *stt = container_of(timer, - struct kbasep_pm_tick_timer_state, timer); - struct kbase_device *kbdev = container_of(stt, struct kbase_device, - pm.backend.shader_tick_timer); - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - unsigned long flags; - enum hrtimer_restart restart = HRTIMER_NORESTART; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (stt->remaining_ticks && - backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { - stt->remaining_ticks--; - - /* If the remaining ticks just changed from 1 to 0, invoke the - * PM state machine to power off the shader cores. - */ - if (!stt->remaining_ticks && !backend->shaders_desired) - kbase_pm_update_state(kbdev); - } - - if (stt->needed) { - hrtimer_forward_now(timer, stt->configured_interval); - restart = HRTIMER_RESTART; - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return restart; -} - -int kbase_pm_state_machine_init(struct kbase_device *kbdev) -{ - struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; - - stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!stt->wq) - return -ENOMEM; - - INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); - - stt->needed = false; - hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - stt->timer.function = shader_tick_timer_callback; - stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); - stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; - - return 0; -} - -void kbase_pm_state_machine_term(struct kbase_device *kbdev) -{ - hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); - destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); -} - -void kbase_pm_reset_start_locked(struct kbase_device *kbdev) -{ - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - backend->in_reset = true; - backend->l2_state = KBASE_L2_RESET_WAIT; - backend->shaders_state = KBASE_SHADERS_RESET_WAIT; - - /* We're in a reset, so hwcnt will have been synchronously disabled by - * this function's caller as part of the reset process. We therefore - * know that any call to kbase_hwcnt_context_disable_atomic, if - * required to sync the hwcnt refcount with our internal state, is - * guaranteed to succeed. - */ - backend->hwcnt_desired = false; - if (!backend->hwcnt_disabled) { - WARN_ON(!kbase_hwcnt_context_disable_atomic( - kbdev->hwcnt_gpu_ctx)); - backend->hwcnt_disabled = true; - } - - shader_poweroff_timer_queue_cancel(kbdev); -} - -void kbase_pm_reset_complete(struct kbase_device *kbdev) -{ - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - backend->in_reset = false; - kbase_pm_update_state(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has - * aborted due to a fatal signal. If the time spent waiting has exceeded this - * threshold then there is most likely a hardware issue. */ -#define PM_TIMEOUT (5*HZ) /* 5s */ - -static void kbase_pm_timed_out(struct kbase_device *kbdev) -{ - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); - dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, "\tShader=%016llx\n", - kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); - dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_HI)), - kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_READY_LO))); - dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, "\tShader=%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tTiler =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, "\tL2 =%08x%08x\n", - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_HI)), - kbase_reg_read(kbdev, GPU_CONTROL_REG( - L2_PWRTRANS_LO))); - - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); -} - -void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) -{ - unsigned long flags; - unsigned long timeout; - int err; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - timeout = jiffies + PM_TIMEOUT; - - /* Wait for cores */ - err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); - - if (err < 0 && time_after(jiffies, timeout)) - kbase_pm_timed_out(kbdev); -} - -void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) -{ - unsigned long flags; - unsigned long timeout; - int err; - - /* Let the state machine latch the most recent desired state. */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - timeout = jiffies + PM_TIMEOUT; - - /* Wait for cores */ - err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev)); - - if (err < 0 && time_after(jiffies, timeout)) - kbase_pm_timed_out(kbdev); -} -KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); - -void kbase_pm_enable_interrupts(struct kbase_device *kbdev) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - /* - * Clear all interrupts, - * and unmask them all. - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); -} - -KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); - -void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(NULL != kbdev); - /* - * Mask all interrupts, - * and clear them all. - */ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); -} - -void kbase_pm_disable_interrupts(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_disable_interrupts_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); - -/* - * pmu layout: - * 0x0000: PMU TAG (RO) (0xCAFECAFE) - * 0x0004: PMU VERSION ID (RO) (0x00000000) - * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) - */ -void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) -{ - bool reset_required = is_resume; - unsigned long flags; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - lockdep_assert_held(&kbdev->pm.lock); - - if (kbdev->pm.backend.gpu_powered) { - /* Already turned on */ - if (kbdev->poweroff_pending) - kbase_pm_enable_interrupts(kbdev); - kbdev->poweroff_pending = false; - KBASE_DEBUG_ASSERT(!is_resume); - return; - } - - kbdev->poweroff_pending = false; - - KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); - - if (is_resume && kbdev->pm.backend.callback_power_resume) { - kbdev->pm.backend.callback_power_resume(kbdev); - return; - } else if (kbdev->pm.backend.callback_power_on) { - reset_required = kbdev->pm.backend.callback_power_on(kbdev); - } - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - kbdev->pm.backend.gpu_powered = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (reset_required) { - /* GPU state was lost, reset GPU to ensure it is in a - * consistent state */ - kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); - } - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_ctx_sched_restore_all_as(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - /* Enable the interrupts */ - kbase_pm_enable_interrupts(kbdev); - - /* Turn on the L2 caches */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.l2_desired = true; - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_pm_clock_on); - -bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.lock); - - /* ASSERT that the cores should now be unavailable. No lock needed. */ - WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); - - kbdev->poweroff_pending = true; - - if (!kbdev->pm.backend.gpu_powered) { - /* Already turned off */ - if (is_suspend && kbdev->pm.backend.callback_power_suspend) - kbdev->pm.backend.callback_power_suspend(kbdev); - return true; - } - - KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); - - /* Disable interrupts. This also clears any outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); - /* Ensure that any IRQ handlers have finished */ - kbase_synchronize_irqs(kbdev); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (atomic_read(&kbdev->faults_pending)) { - /* Page/bus faults are still being processed. The GPU can not - * be powered off until they have completed */ - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - flags); - return false; - } - - kbase_pm_cache_snoop_disable(kbdev); - - /* The GPU power may be turned off from this point */ - kbdev->pm.backend.gpu_powered = false; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); - - if (is_suspend && kbdev->pm.backend.callback_power_suspend) - kbdev->pm.backend.callback_power_suspend(kbdev); - else if (kbdev->pm.backend.callback_power_off) - kbdev->pm.backend.callback_power_off(kbdev); - return true; -} - -KBASE_EXPORT_TEST_API(kbase_pm_clock_off); - -struct kbasep_reset_timeout_data { - struct hrtimer timer; - bool timed_out; - struct kbase_device *kbdev; -}; - -void kbase_pm_reset_done(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - kbdev->pm.backend.reset_done = true; - wake_up(&kbdev->pm.backend.reset_done_wait); -} - -/** - * kbase_pm_wait_for_reset - Wait for a reset to happen - * - * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. - * - * @kbdev: Kbase device - */ -static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->pm.lock); - - wait_event(kbdev->pm.backend.reset_done_wait, - (kbdev->pm.backend.reset_done)); - kbdev->pm.backend.reset_done = false; -} - -KBASE_EXPORT_TEST_API(kbase_pm_reset_done); - -static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) -{ - struct kbasep_reset_timeout_data *rtdata = - container_of(timer, struct kbasep_reset_timeout_data, timer); - - rtdata->timed_out = 1; - - /* Set the wait queue to wake up kbase_pm_init_hw even though the reset - * hasn't completed */ - kbase_pm_reset_done(rtdata->kbdev); - - return HRTIMER_NORESTART; -} - -static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) -{ - struct device_node *np = kbdev->dev->of_node; - u32 jm_values[4]; - const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> - GPU_ID_VERSION_PRODUCT_ID_SHIFT; - const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >> - GPU_ID_VERSION_MAJOR_SHIFT; - - kbdev->hw_quirks_sc = 0; - - /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. - * and needed due to MIDGLES-3539. See PRLAM-11035 */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) || - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035)) - kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE; - - /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. - */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) - kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; - -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY - /* Enable alternative hardware counter selection if configured. */ - if (!GPU_ID_IS_NEW_FORMAT(prod_id)) - kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; -#endif - - /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) - kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; - - if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { - if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ - kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ - kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; - } - - if (!kbdev->hw_quirks_sc) - kbdev->hw_quirks_sc = kbase_reg_read(kbdev, - GPU_CONTROL_REG(SHADER_CONFIG)); - - kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TILER_CONFIG)); - - /* Set tiler clock gate override if required */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) - kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; - - /* Limit the GPU bus bandwidth if the platform needs this. */ - kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, - GPU_CONTROL_REG(L2_MMU_CONFIG)); - - - /* Limit read & write ID width for AXI */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) { - kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS); - kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) << - L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT; - - kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES); - kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) << - L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT; - } else { - kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); - kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) << - L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; - - kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); - kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << - L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; - } - - if (kbdev->system_coherency == COHERENCY_ACE) { - /* Allow memory configuration disparity to be ignored, we - * optimize the use of shared memory and thus we expect - * some disparity in the memory configuration */ - kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; - } - - kbdev->hw_quirks_jm = 0; - /* Only for T86x/T88x-based products after r2p0 */ - if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { - - if (of_property_read_u32_array(np, - "jm_config", - &jm_values[0], - ARRAY_SIZE(jm_values))) { - /* Entry not in device tree, use defaults */ - jm_values[0] = 0; - jm_values[1] = 0; - jm_values[2] = 0; - jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; - } - - /* Limit throttle limit to 6 bits*/ - if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) { - dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63)."); - jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT; - } - - /* Aggregate to one integer. */ - kbdev->hw_quirks_jm |= (jm_values[0] ? - JM_TIMESTAMP_OVERRIDE : 0); - kbdev->hw_quirks_jm |= (jm_values[1] ? - JM_CLOCK_GATE_OVERRIDE : 0); - kbdev->hw_quirks_jm |= (jm_values[2] ? - JM_JOB_THROTTLE_ENABLE : 0); - kbdev->hw_quirks_jm |= (jm_values[3] << - JM_JOB_THROTTLE_LIMIT_SHIFT); - - } else if (GPU_ID_IS_NEW_FORMAT(prod_id) && - (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == - GPU_ID2_PRODUCT_TMIX)) { - /* Only for tMIx */ - u32 coherency_features; - - coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES)); - - /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly - * documented for tMIx so force correct value here. - */ - if (coherency_features == - COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { - kbdev->hw_quirks_jm |= - (COHERENCY_ACE_LITE | COHERENCY_ACE) << - JM_FORCE_COHERENCY_FEATURES_SHIFT; - } - } - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) - kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { - int default_idvs_group_size = 0xF; - u32 tmp; - - if (of_property_read_u32(kbdev->dev->of_node, - "idvs-group-size", &tmp)) - tmp = default_idvs_group_size; - - if (tmp > JM_MAX_IDVS_GROUP_SIZE) { - dev_err(kbdev->dev, - "idvs-group-size of %d is too large. Maximum value is %d", - tmp, JM_MAX_IDVS_GROUP_SIZE); - tmp = default_idvs_group_size; - } - - kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT; - } - - if (!kbdev->hw_quirks_jm) - kbdev->hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JM_CONFIG)); - -#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) - if (corestack_driver_control) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; -} - -static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) -{ - kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), - kbdev->hw_quirks_sc); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), - kbdev->hw_quirks_tiler); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), - kbdev->hw_quirks_mmu); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_jm); - -} - -void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) -{ - if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && - !kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 - if (kbdev->snoop_enable_smc != 0) - kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); -#endif /* CONFIG_ARM64 */ - dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); - kbdev->cci_snoop_enabled = true; - } -} - -void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) -{ - if (kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 - if (kbdev->snoop_disable_smc != 0) { - mali_cci_flush_l2(kbdev); - kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); - } -#endif /* CONFIG_ARM64 */ - dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); - kbdev->cci_snoop_enabled = false; - } -} - -static int kbase_pm_do_reset(struct kbase_device *kbdev) -{ - struct kbasep_reset_timeout_data rtdata; - - KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); - - KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SOFT_RESET); - - /* Unmask the reset complete interrupt only */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); - - /* Initialize a structure for tracking the status of the reset */ - rtdata.kbdev = kbdev; - rtdata.timed_out = 0; - - /* Create a timer to use as a timeout on the reset */ - hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rtdata.timer.function = kbasep_reset_timeout; - - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); - - /* Wait for the RESET_COMPLETED interrupt to be raised */ - kbase_pm_wait_for_reset(kbdev); - - if (rtdata.timed_out == 0) { - /* GPU has been reset */ - hrtimer_cancel(&rtdata.timer); - destroy_hrtimer_on_stack(&rtdata.timer); - return 0; - } - - /* No interrupt has been received - check if the RAWSTAT register says - * the reset has completed */ - if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - RESET_COMPLETED) { - /* The interrupt is set in the RAWSTAT; this suggests that the - * interrupts are not getting to the CPU */ - dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); - /* If interrupts aren't working we can't continue. */ - destroy_hrtimer_on_stack(&rtdata.timer); - return -EINVAL; - } - - /* The GPU doesn't seem to be responding to the reset so try a hard - * reset */ - dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", - RESET_TIMEOUT); - KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET); - - /* Restart the timer to wait for the hard reset to complete */ - rtdata.timed_out = 0; - - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); - - /* Wait for the RESET_COMPLETED interrupt to be raised */ - kbase_pm_wait_for_reset(kbdev); - - if (rtdata.timed_out == 0) { - /* GPU has been reset */ - hrtimer_cancel(&rtdata.timer); - destroy_hrtimer_on_stack(&rtdata.timer); - return 0; - } - - destroy_hrtimer_on_stack(&rtdata.timer); - - dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", - RESET_TIMEOUT); - - return -EINVAL; -} - -static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) -{ - struct kbase_device *kbdev = pdev->data; - - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SET_PROTECTED_MODE); - return 0; -} - -static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) -{ - struct kbase_device *kbdev = pdev->data; - - lockdep_assert_held(&kbdev->pm.lock); - - return kbase_pm_do_reset(kbdev); -} - -struct protected_mode_ops kbase_native_protected_ops = { - .protected_mode_enable = kbasep_protected_mode_enable, - .protected_mode_disable = kbasep_protected_mode_disable -}; - -int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) -{ - unsigned long irq_flags; - int err; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.lock); - - /* Ensure the clock is on before attempting to access the hardware */ - if (!kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.backend.callback_power_on) - kbdev->pm.backend.callback_power_on(kbdev); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - kbdev->pm.backend.gpu_powered = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - } - - /* Ensure interrupts are off to begin with, this also clears any - * outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); - /* Ensure cache snoops are disabled before reset. */ - kbase_pm_cache_snoop_disable(kbdev); - /* Prepare for the soft-reset */ - kbdev->pm.backend.reset_done = false; - - /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - - /* Soft reset the GPU */ - if (kbdev->protected_mode_support) - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); - else - err = kbase_pm_do_reset(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - - if (err) - goto exit; - - if (flags & PM_HW_ISSUES_DETECT) - kbase_pm_hw_issues_detect(kbdev); - - kbase_pm_hw_issues_apply(kbdev); - kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); - - /* Sanity check protected mode was left after reset */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - u32 gpu_status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS)); - - WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); - } - - /* If cycle counter was in use re-enable it, enable_irqs will only be - * false when called from kbase_pm_powerup */ - if (kbdev->pm.backend.gpu_cycle_counter_requests && - (flags & PM_ENABLE_IRQS)) { - kbase_pm_enable_interrupts(kbdev); - - /* Re-enable the counters if we need to */ - spin_lock_irqsave( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); - if (kbdev->pm.backend.gpu_cycle_counter_requests) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - irq_flags); - - kbase_pm_disable_interrupts(kbdev); - } - - if (flags & PM_ENABLE_IRQS) - kbase_pm_enable_interrupts(kbdev); - -exit: - /* Re-enable GPU hardware counters if we're resetting from protected - * mode. - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - kbdev->protected_mode_hwcnt_desired = true; - if (kbdev->protected_mode_hwcnt_disabled) { - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - kbdev->protected_mode_hwcnt_disabled = false; - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); - return err; -} - -/** - * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters - * - * Increase the count of cycle counter users and turn the cycle counters on if - * they were previously off - * - * This function is designed to be called by - * kbase_pm_request_gpu_cycle_counter() or - * kbase_pm_request_gpu_cycle_counter_l2_is_on() only - * - * When this function is called the l2 cache must be on - i.e., the GPU must be - * on. - * - * @kbdev: The kbase device structure of the device - */ -static void -kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); - - ++kbdev->pm.backend.gpu_cycle_counter_requests; - - if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_START); - - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); -} - -void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); - - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); - -void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < - INT_MAX); - - kbase_pm_request_gpu_cycle_counter_do_request(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); - -void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); - - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); - - --kbdev->pm.backend.gpu_cycle_counter_requests; - - if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CYCLE_COUNT_STOP); - - spin_unlock_irqrestore( - &kbdev->pm.backend.gpu_cycle_counter_requests_lock, - flags); -} - -void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_internal.h deleted file mode 100755 index e88b3a836631..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_internal.h +++ /dev/null @@ -1,624 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Power management API definitions used internally by GPU backend - */ - -#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ -#define _KBASE_BACKEND_PM_INTERNAL_H_ - -#include - -#include "mali_kbase_pm_ca.h" -#include "mali_kbase_pm_policy.h" - - -/** - * kbase_pm_dev_idle - The GPU is idle. - * - * The OS may choose to turn off idle devices - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_dev_idle(struct kbase_device *kbdev); - -/** - * kbase_pm_dev_activate - The GPU is active. - * - * The OS should avoid opportunistically turning off the GPU while it is active - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_dev_activate(struct kbase_device *kbdev); - -/** - * kbase_pm_get_present_cores - Get details of the cores that are present in - * the device. - * - * This function can be called by the active power policy to return a bitmask of - * the cores (of a specified type) present in the GPU device and also a count of - * the number of cores. - * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * - * Return: The bit mask of cores present - */ -u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); - -/** - * kbase_pm_get_active_cores - Get details of the cores that are currently - * active in the device. - * - * This function can be called by the active power policy to return a bitmask of - * the cores (of a specified type) that are actively processing work (i.e. - * turned on *and* busy). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * - * Return: The bit mask of active cores - */ -u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); - -/** - * kbase_pm_get_trans_cores - Get details of the cores that are currently - * transitioning between power states. - * - * This function can be called by the active power policy to return a bitmask of - * the cores (of a specified type) that are currently transitioning between - * power states. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * - * Return: The bit mask of transitioning cores - */ -u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); - -/** - * kbase_pm_get_ready_cores - Get details of the cores that are currently - * powered and ready for jobs. - * - * This function can be called by the active power policy to return a bitmask of - * the cores (of a specified type) that are powered and ready for jobs (they may - * or may not be currently executing jobs). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @type: The type of core (see the enum kbase_pm_core_type enumeration) - * - * Return: The bit mask of ready cores - */ -u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, - enum kbase_pm_core_type type); - -/** - * kbase_pm_clock_on - Turn the clock for the device on, and enable device - * interrupts. - * - * This function can be used by a power policy to turn the clock for the GPU on. - * It should be modified during integration to perform the necessary actions to - * ensure that the GPU is fully powered and clocked. - * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * @is_resume: true if clock on due to resume after suspend, false otherwise - */ -void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); - -/** - * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the - * device off. - * - * This function can be used by a power policy to turn the clock for the GPU - * off. It should be modified during integration to perform the necessary - * actions to turn the clock off (if this is possible in the integration). - * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * @is_suspend: true if clock off due to suspend, false otherwise - * - * Return: true if clock was turned off, or - * false if clock can not be turned off due to pending page/bus fault - * workers. Caller must flush MMU workqueues and retry - */ -bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); - -/** - * kbase_pm_enable_interrupts - Enable interrupts on the device. - * - * Interrupts are also enabled after a call to kbase_pm_clock_on(). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_enable_interrupts(struct kbase_device *kbdev); - -/** - * kbase_pm_disable_interrupts - Disable interrupts on the device. - * - * This prevents delivery of Power Management interrupts to the CPU so that - * kbase_pm_update_state() will not be called from the IRQ handler - * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. - * - * Interrupts are also disabled after a call to kbase_pm_clock_off(). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_disable_interrupts(struct kbase_device *kbdev); - -/** - * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() - * that does not take the hwaccess_lock - * - * Caller must hold the hwaccess_lock. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); - -/** - * kbase_pm_init_hw - Initialize the hardware. - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @flags: Flags specifying the type of PM init - * - * This function checks the GPU ID register to ensure that the GPU is supported - * by the driver and performs a reset on the device so that it is in a known - * state before the device is used. - * - * Return: 0 if the device is supported and successfully reset. - */ -int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); - -/** - * kbase_pm_reset_done - The GPU has been reset successfully. - * - * This function must be called by the GPU interrupt handler when the - * RESET_COMPLETED bit is set. It signals to the power management initialization - * code that the GPU has been successfully reset. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_reset_done(struct kbase_device *kbdev); - -/** - * kbase_pm_wait_for_desired_state - Wait for the desired power state to be - * reached - * - * Wait for the L2 and shader power state machines to reach the states - * corresponding to the values of 'l2_desired' and 'shaders_desired'. - * - * The usual use-case for this is to ensure cores are 'READY' after performing - * a GPU Reset. - * - * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, - * because this function will take that lock itself. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); - -/** - * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on - * - * Wait for the L2 to be powered on, and for the L2 and shader state machines to - * stabilise by reaching the states corresponding to the values of 'l2_desired' - * and 'shaders_desired'. - * - * kbdev->pm.active_count must be non-zero when calling this function. - * - * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, - * because this function will take that lock itself. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); - -/** - * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() - * where the caller must hold - * kbase_device.pm.power_change_lock - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); - -/** - * kbase_pm_update_state - Update the L2 and shader power state machines - * @kbdev: Device pointer - */ -void kbase_pm_update_state(struct kbase_device *kbdev); - -/** - * kbase_pm_state_machine_init - Initialize the state machines, primarily the - * shader poweroff timer - * @kbdev: Device pointer - */ -int kbase_pm_state_machine_init(struct kbase_device *kbdev); - -/** - * kbase_pm_state_machine_term - Clean up the PM state machines' data - * @kbdev: Device pointer - */ -void kbase_pm_state_machine_term(struct kbase_device *kbdev); - -/** - * kbase_pm_update_cores_state - Update the desired state of shader cores from - * the Power Policy, and begin any power - * transitions. - * - * This function will update the desired_xx_state members of - * struct kbase_pm_device_data by calling into the current Power Policy. It will - * then begin power transitions to make the hardware acheive the desired shader - * core state. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_update_cores_state(struct kbase_device *kbdev); - -/** - * kbasep_pm_metrics_init - Initialize the metrics gathering framework. - * - * This must be called before other metric gathering APIs are called. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: 0 on success, error code on error - */ -int kbasep_pm_metrics_init(struct kbase_device *kbdev); - -/** - * kbasep_pm_metrics_term - Terminate the metrics gathering framework. - * - * This must be called when metric gathering is no longer required. It is an - * error to call any metrics gathering function (other than - * kbasep_pm_metrics_init()) after calling this function. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbasep_pm_metrics_term(struct kbase_device *kbdev); - -/** - * kbase_pm_report_vsync - Function to be called by the frame buffer driver to - * update the vsync metric. - * - * This function should be called by the frame buffer driver to update whether - * the system is hitting the vsync target or not. buffer_updated should be true - * if the vsync corresponded with a new frame being displayed, otherwise it - * should be false. This function does not need to be called every vsync, but - * only when the value of @buffer_updated differs from a previous call. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - * @buffer_updated: True if the buffer has been updated on this VSync, - * false otherwise - */ -void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); - -/** - * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change - * the clock speed of the GPU. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * This function should be called regularly by the DVFS system to check whether - * the clock speed of the GPU needs updating. - */ -void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); - -/** - * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is - * needed - * - * If the caller is the first caller then the GPU cycle counters will be enabled - * along with the l2 cache - * - * The GPU must be powered when calling this function (i.e. - * kbase_pm_context_active() must have been called). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); - -/** - * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is - * needed (l2 cache already on) - * - * This is a version of the above function - * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the - * l2 cache is known to be on and assured to be on until the subsequent call of - * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does - * not sleep and can be called from atomic functions. - * - * The GPU must be powered when calling this function (i.e. - * kbase_pm_context_active() must have been called) and the l2 cache must be - * powered on. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); - -/** - * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no - * longer in use - * - * If the caller is the last caller then the GPU cycle counters will be - * disabled. A request must have been made before a call to this. - * - * Caller must not hold the hwaccess_lock, as it will be taken in this function. - * If the caller is already holding this lock then - * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); - -/** - * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() - * that does not take hwaccess_lock - * - * Caller must hold the hwaccess_lock. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); - -/** - * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to - * complete - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); - -/** - * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device - * - * Setup the power management callbacks and initialize/enable the runtime-pm - * for the Mali GPU platform device, using the callback function. This must be - * called before the kbase_pm_register_access_enable() function. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -int kbase_pm_runtime_init(struct kbase_device *kbdev); - -/** - * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_runtime_term(struct kbase_device *kbdev); - -/** - * kbase_pm_register_access_enable - Enable access to GPU registers - * - * Enables access to the GPU registers before power management has powered up - * the GPU with kbase_pm_powerup(). - * - * This results in the power management callbacks provided in the driver - * configuration to get called to turn on power and/or clocks to the GPU. See - * kbase_pm_callback_conf. - * - * This should only be used before power management is powered up with - * kbase_pm_powerup() - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_register_access_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_register_access_disable - Disable early register access - * - * Disables access to the GPU registers enabled earlier by a call to - * kbase_pm_register_access_enable(). - * - * This results in the power management callbacks provided in the driver - * configuration to get called to turn off power and/or clocks to the GPU. See - * kbase_pm_callback_conf - * - * This should only be used before power management is powered up with - * kbase_pm_powerup() - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_register_access_disable(struct kbase_device *kbdev); - -/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline - * function */ - -/** - * kbase_pm_metrics_is_active - Check if the power management metrics - * collection is active. - * - * Note that this returns if the power management metrics collection was - * active at the time of calling, it is possible that after the call the metrics - * collection enable may have changed state. - * - * The caller must handle the consequence that the state may have changed. - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * Return: true if metrics collection was active else false. - */ -bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); - -/** - * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. - * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * @is_resume: true if power on due to resume after suspend, - * false otherwise - */ -void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); - -/** - * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been - * requested. - * - * @kbdev: The kbase device structure for the device (must be a valid - * pointer) - * @is_suspend: true if power off due to suspend, - * false otherwise - */ -void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); - -#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, - struct kbasep_pm_metrics *diff); -#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ - -#ifdef CONFIG_MALI_MIDGARD_DVFS - -/** - * kbase_platform_dvfs_event - Report utilisation to DVFS code - * - * Function provided by platform specific code when DVFS is enabled to allow - * the power management metrics system to report utilisation. - * - * @kbdev: The kbase device structure for the device (must be a - * valid pointer) - * @utilisation: The current calculated utilisation by the metrics system. - * @util_gl_share: The current calculated gl share of utilisation. - * @util_cl_share: The current calculated cl share of utilisation per core - * group. - * Return: Returns 0 on failure and non zero on success. - */ - -int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]); -#endif - -void kbase_pm_power_changed(struct kbase_device *kbdev); - -/** - * kbase_pm_metrics_update - Inform the metrics system that an atom is either - * about to be run or has just completed. - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @now: Pointer to the timestamp of the change, or NULL to use current time - * - * Caller must hold hwaccess_lock - */ -void kbase_pm_metrics_update(struct kbase_device *kbdev, - ktime_t *now); - -/** - * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU - * If the GPU does not have coherency this is a no-op - * @kbdev: Device pointer - * - * This function should be called after L2 power up. - */ - -void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU - * If the GPU does not have coherency this is a no-op - * @kbdev: Device pointer - * - * This function should be called before L2 power off. - */ -void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); - -#ifdef CONFIG_MALI_DEVFREQ -/** - * kbase_devfreq_set_core_mask - Set devfreq core mask - * @kbdev: Device pointer - * @core_mask: New core mask - * - * This function is used by devfreq to change the available core mask as - * required by Dynamic Core Scaling. - */ -void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); -#endif - -/** - * kbase_pm_reset_start_locked - Signal that GPU reset has started - * @kbdev: Device pointer - * - * Normal power management operation will be suspended until the reset has - * completed. - * - * Caller must hold hwaccess_lock. - */ -void kbase_pm_reset_start_locked(struct kbase_device *kbdev); - -/** - * kbase_pm_reset_complete - Signal that GPU reset has completed - * @kbdev: Device pointer - * - * Normal power management operation will be resumed. The power manager will - * re-evaluate what cores are needed and power on or off as required. - */ -void kbase_pm_reset_complete(struct kbase_device *kbdev); - -/** - * kbase_pm_protected_override_enable - Enable the protected mode override - * @kbdev: Device pointer - * - * When the protected mode override is enabled, all shader cores are requested - * to power down, and the L2 power state can be controlled by - * kbase_pm_protected_l2_override(). - * - * Caller must hold hwaccess_lock. - */ -void kbase_pm_protected_override_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_protected_override_disable - Disable the protected mode override - * @kbdev: Device pointer - * - * Caller must hold hwaccess_lock. - */ -void kbase_pm_protected_override_disable(struct kbase_device *kbdev); - -/** - * kbase_pm_protected_l2_override - Control the protected mode L2 override - * @kbdev: Device pointer - * @override: true to enable the override, false to disable - * - * When the driver is transitioning in or out of protected mode, the L2 cache is - * forced to power off. This can be overridden to force the L2 cache to power - * on. This is required to change coherency settings on some GPUs. - */ -void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); - -/* If true, the driver should explicitly control corestack power management, - * instead of relying on the Power Domain Controller. - */ -extern bool corestack_driver_control; - -/* If true, disable powering-down of individual cores, and just power-down at - * the top-level using platform-specific code. - * If false, use the expected behaviour of controlling the individual cores - * from within the driver. - */ -extern bool platform_power_down_only; - -#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_metrics.c deleted file mode 100755 index 6b9b6862cc9b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_metrics.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Metrics for power management - */ - -#include -#include -#include -#include -#include - -/* When VSync is being hit aim for utilisation between 70-90% */ -#define KBASE_PM_VSYNC_MIN_UTILISATION 70 -#define KBASE_PM_VSYNC_MAX_UTILISATION 90 -/* Otherwise aim for 10-40% */ -#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 -#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 - -/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns - * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly - * under 11s. Exceeding this will cause overflow */ -#define KBASE_PM_TIME_SHIFT 8 - -#ifdef CONFIG_MALI_MIDGARD_DVFS -static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) -{ - unsigned long flags; - struct kbasep_pm_metrics_state *metrics; - - KBASE_DEBUG_ASSERT(timer != NULL); - - metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); - kbase_pm_get_dvfs_action(metrics->kbdev); - - spin_lock_irqsave(&metrics->lock, flags); - - if (metrics->timer_active) - hrtimer_start(timer, - HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); - - spin_unlock_irqrestore(&metrics->lock, flags); - - return HRTIMER_NORESTART; -} -#endif /* CONFIG_MALI_MIDGARD_DVFS */ - -int kbasep_pm_metrics_init(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - kbdev->pm.backend.metrics.kbdev = kbdev; - - kbdev->pm.backend.metrics.time_period_start = ktime_get(); - kbdev->pm.backend.metrics.gpu_active = false; - kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; - - kbdev->pm.backend.metrics.values.time_busy = 0; - kbdev->pm.backend.metrics.values.time_idle = 0; - kbdev->pm.backend.metrics.values.busy_cl[0] = 0; - kbdev->pm.backend.metrics.values.busy_cl[1] = 0; - kbdev->pm.backend.metrics.values.busy_gl = 0; - - spin_lock_init(&kbdev->pm.backend.metrics.lock); - -#ifdef CONFIG_MALI_MIDGARD_DVFS - kbdev->pm.backend.metrics.timer_active = true; - hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - kbdev->pm.backend.metrics.timer.function = dvfs_callback; - - hrtimer_start(&kbdev->pm.backend.metrics.timer, - HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), - HRTIMER_MODE_REL); -#endif /* CONFIG_MALI_MIDGARD_DVFS */ - - return 0; -} - -KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); - -void kbasep_pm_metrics_term(struct kbase_device *kbdev) -{ -#ifdef CONFIG_MALI_MIDGARD_DVFS - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.timer_active = false; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); -#endif /* CONFIG_MALI_MIDGARD_DVFS */ -} - -KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); - -/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this - * function - */ -static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, - ktime_t now) -{ - ktime_t diff; - - lockdep_assert_held(&kbdev->pm.backend.metrics.lock); - - diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); - if (ktime_to_ns(diff) < 0) - return; - - if (kbdev->pm.backend.metrics.gpu_active) { - u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); - - kbdev->pm.backend.metrics.values.time_busy += ns_time; - if (kbdev->pm.backend.metrics.active_cl_ctx[0]) - kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; - if (kbdev->pm.backend.metrics.active_cl_ctx[1]) - kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; - if (kbdev->pm.backend.metrics.active_gl_ctx[0]) - kbdev->pm.backend.metrics.values.busy_gl += ns_time; - if (kbdev->pm.backend.metrics.active_gl_ctx[1]) - kbdev->pm.backend.metrics.values.busy_gl += ns_time; - } else { - kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) - >> KBASE_PM_TIME_SHIFT); - } - - kbdev->pm.backend.metrics.time_period_start = now; -} - -#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) -void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, - struct kbasep_pm_metrics *last, - struct kbasep_pm_metrics *diff) -{ - struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); - - memset(diff, 0, sizeof(*diff)); - diff->time_busy = cur->time_busy - last->time_busy; - diff->time_idle = cur->time_idle - last->time_idle; - diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; - diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; - diff->busy_gl = cur->busy_gl - last->busy_gl; - - *last = *cur; - - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -} -KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); -#endif - -#ifdef CONFIG_MALI_MIDGARD_DVFS -void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) -{ - int utilisation, util_gl_share; - int util_cl_share[2]; - int busy; - struct kbasep_pm_metrics *diff; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - diff = &kbdev->pm.backend.metrics.dvfs_diff; - - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); - - utilisation = (100 * diff->time_busy) / - max(diff->time_busy + diff->time_idle, 1u); - - busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); - util_gl_share = (100 * diff->busy_gl) / busy; - util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; - util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; - - kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); -} - -bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) -{ - bool isactive; - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - isactive = kbdev->pm.backend.metrics.timer_active; - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - - return isactive; -} -KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); - -#endif /* CONFIG_MALI_MIDGARD_DVFS */ - -/** - * kbase_pm_metrics_active_calc - Update PM active counts based on currently - * running atoms - * @kbdev: Device pointer - * - * The caller must hold kbdev->pm.backend.metrics.lock - */ -static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) -{ - int js; - - lockdep_assert_held(&kbdev->pm.backend.metrics.lock); - - kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; - kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; - kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; - kbdev->pm.backend.metrics.gpu_active = false; - - for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - - /* Head atom may have just completed, so if it isn't running - * then try the next atom */ - if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) - katom = kbase_gpu_inspect(kbdev, js, 1); - - if (katom && katom->gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { - if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - int device_nr = (katom->core_req & - BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) - ? katom->device_nr : 0; - if (!WARN_ON(device_nr >= 2)) - kbdev->pm.backend.metrics. - active_cl_ctx[device_nr] = 1; - } else { - /* Slot 2 should not be running non-compute - * atoms */ - if (!WARN_ON(js >= 2)) - kbdev->pm.backend.metrics. - active_gl_ctx[js] = 1; - } - kbdev->pm.backend.metrics.gpu_active = true; - } - } -} - -/* called when job is submitted to or removed from a GPU slot */ -void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) -{ - unsigned long flags; - ktime_t now; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - - if (!timestamp) { - now = ktime_get(); - timestamp = &now; - } - - /* Track how long CL and/or GL jobs have been busy for */ - kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); - - kbase_pm_metrics_active_calc(kbdev); - - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); -} diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.c deleted file mode 100755 index 2f06a0a4b247..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Power policy API implementations - */ - -#include -#include -#include -#include -#include - -static const struct kbase_pm_policy *const all_policy_list[] = { -#ifdef CONFIG_MALI_NO_MALI - &kbase_pm_always_on_policy_ops, - &kbase_pm_coarse_demand_policy_ops, -#if !MALI_CUSTOMER_RELEASE - &kbase_pm_always_on_demand_policy_ops, -#endif -#else /* CONFIG_MALI_NO_MALI */ - &kbase_pm_coarse_demand_policy_ops, -#if !MALI_CUSTOMER_RELEASE - &kbase_pm_always_on_demand_policy_ops, -#endif - &kbase_pm_always_on_policy_ops -#endif /* CONFIG_MALI_NO_MALI */ -}; - -/* A filtered list of policies available in the system, calculated by filtering - * all_policy_list based on the flags provided by each policy. - */ -static const struct kbase_pm_policy *enabled_policy_list[ARRAY_SIZE(all_policy_list)]; -static size_t enabled_policy_count; - -static void generate_filtered_policy_list(void) -{ - size_t i; - - for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) { - const struct kbase_pm_policy *pol = all_policy_list[i]; - - if (platform_power_down_only && - (pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY)) - continue; - - enabled_policy_list[enabled_policy_count++] = pol; - } -} - -int kbase_pm_policy_init(struct kbase_device *kbdev) -{ - generate_filtered_policy_list(); - if (enabled_policy_count == 0) - return -EINVAL; - - kbdev->pm.backend.pm_current_policy = enabled_policy_list[0]; - kbdev->pm.backend.pm_current_policy->init(kbdev); - - return 0; -} - -void kbase_pm_policy_term(struct kbase_device *kbdev) -{ - kbdev->pm.backend.pm_current_policy->term(kbdev); -} - -void kbase_pm_update_active(struct kbase_device *kbdev) -{ - struct kbase_pm_device_data *pm = &kbdev->pm; - struct kbase_pm_backend_data *backend = &pm->backend; - unsigned long flags; - bool active; - - lockdep_assert_held(&pm->lock); - - /* pm_current_policy will never be NULL while pm.lock is held */ - KBASE_DEBUG_ASSERT(backend->pm_current_policy); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - active = backend->pm_current_policy->get_core_active(kbdev); - WARN((kbase_pm_is_active(kbdev) && !active), - "GPU is active but policy '%s' is indicating that it can be powered off", - kbdev->pm.backend.pm_current_policy->name); - - if (active) { - /* Power on the GPU and any cores requested by the policy */ - if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && - pm->backend.poweroff_wait_in_progress) { - KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); - pm->backend.poweron_required = true; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } else { - /* Cancel the the invocation of - * kbase_pm_gpu_poweroff_wait_wq() from the L2 state - * machine. This is safe - it - * invoke_poweroff_wait_wq_when_l2_off is true, then - * the poweroff work hasn't even been queued yet, - * meaning we can go straight to powering on. - */ - pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; - pm->backend.poweroff_wait_in_progress = false; - pm->backend.l2_desired = true; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_pm_do_poweron(kbdev, false); - } - } else { - /* It is an error for the power policy to power off the GPU - * when there are contexts active */ - KBASE_DEBUG_ASSERT(pm->active_count == 0); - - /* Request power off */ - if (pm->backend.gpu_powered) { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Power off the GPU immediately */ - kbase_pm_do_poweroff(kbdev, false); - } else { - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - } -} - -void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) -{ - bool shaders_desired; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->pm.backend.pm_current_policy == NULL) - return; - if (kbdev->pm.backend.poweroff_wait_in_progress) - return; - - if (kbdev->pm.backend.protected_transition_override) - /* We are trying to change in/out of protected mode - force all - * cores off so that the L2 powers down */ - shaders_desired = false; - else - shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); - - if (kbdev->pm.backend.shaders_desired != shaders_desired) { - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, - (u32)kbdev->pm.backend.shaders_desired); - - kbdev->pm.backend.shaders_desired = shaders_desired; - kbase_pm_update_state(kbdev); - } -} - -void kbase_pm_update_cores_state(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_pm_update_cores_state_nolock(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) -{ - WARN_ON(enabled_policy_count == 0); - if (list) - *list = enabled_policy_list; - - return enabled_policy_count; -} - -KBASE_EXPORT_TEST_API(kbase_pm_list_policies); - -const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - return kbdev->pm.backend.pm_current_policy; -} - -KBASE_EXPORT_TEST_API(kbase_pm_get_policy); - -void kbase_pm_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_policy *new_policy) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - const struct kbase_pm_policy *old_policy; - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(new_policy != NULL); - - KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); - - /* During a policy change we pretend the GPU is active */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread */ - kbase_pm_context_active(kbdev); - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - /* Remove the policy to prevent IRQ handlers from working on it */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - old_policy = kbdev->pm.backend.pm_current_policy; - kbdev->pm.backend.pm_current_policy = NULL; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, - old_policy->id); - if (old_policy->term) - old_policy->term(kbdev); - - KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, - new_policy->id); - if (new_policy->init) - new_policy->init(kbdev); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.pm_current_policy = new_policy; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* If any core power state changes were previously attempted, but - * couldn't be made because the policy was changing (current_policy was - * NULL), then re-try them here. */ - kbase_pm_update_active(kbdev); - kbase_pm_update_cores_state(kbdev); - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - /* Now the policy change is finished, we release our fake context active - * reference */ - kbase_pm_context_idle(kbdev); -} - -KBASE_EXPORT_TEST_API(kbase_pm_set_policy); diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.h deleted file mode 100755 index 28d258fce359..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_pm_policy.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Power policy API definitions - */ - -#ifndef _KBASE_PM_POLICY_H_ -#define _KBASE_PM_POLICY_H_ - -/** - * kbase_pm_policy_init - Initialize power policy framework - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Must be called before calling any other policy function - * - * Return: 0 if the power policy framework was successfully - * initialized, -errno otherwise. - */ -int kbase_pm_policy_init(struct kbase_device *kbdev); - -/** - * kbase_pm_policy_term - Terminate power policy framework - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_policy_term(struct kbase_device *kbdev); - -/** - * kbase_pm_update_active - Update the active power state of the GPU - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Calls into the current power policy - */ -void kbase_pm_update_active(struct kbase_device *kbdev); - -/** - * kbase_pm_update_cores - Update the desired core state of the GPU - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Calls into the current power policy - */ -void kbase_pm_update_cores(struct kbase_device *kbdev); - -/** - * kbase_pm_cores_requested - Check that a power request has been locked into - * the HW. - * @kbdev: Kbase device - * @shader_required: true if shaders are required - * - * Called by the scheduler to check if a power on request has been locked into - * the HW. - * - * Note that there is no guarantee that the cores are actually ready, however - * when the request has been locked into the HW, then it is safe to submit work - * since the HW will wait for the transition to ready. - * - * A reference must first be taken prior to making this call. - * - * Caller must hold the hwaccess_lock. - * - * Return: true if the request to the HW was successfully made else false if the - * request is still pending. - */ -static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, - bool shader_required) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* If the L2 & tiler are not on or pending, then the tiler is not yet - * available, and shaders are definitely not powered. - */ - if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && - kbdev->pm.backend.l2_state != KBASE_L2_ON) - return false; - - if (shader_required && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && - kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) - return false; - - return true; -} - -#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_time.c deleted file mode 100755 index 5e1b761cf43c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/backend/gpu/mali_kbase_time.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include - -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts) -{ - u32 hi1, hi2; - - kbase_pm_request_gpu_cycle_counter(kbdev); - - /* Read hi, lo, hi to ensure that overflow from lo to hi is handled - * correctly */ - do { - hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI)); - *cycle_counter = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO)); - hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI)); - *cycle_counter |= (((u64) hi1) << 32); - } while (hi1 != hi2); - - /* Read hi, lo, hi to ensure that overflow from lo to hi is handled - * correctly */ - do { - hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI)); - *system_time = kbase_reg_read(kbdev, - GPU_CONTROL_REG(TIMESTAMP_LO)); - hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI)); - *system_time |= (((u64) hi1) << 32); - } while (hi1 != hi2); - - /* Record the CPU's idea of current time */ - getrawmonotonic(ts); - - kbase_pm_release_gpu_cycle_counter(kbdev); -} - -/** - * kbase_wait_write_flush - Wait for GPU write flush - * @kbdev: Kbase device - * - * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush - * its write buffer. - * - * Only in use for BASE_HW_ISSUE_6367 - * - * Note : If GPU resets occur then the counters are reset to zero, the delay may - * not be as expected. - */ -#ifndef CONFIG_MALI_NO_MALI -void kbase_wait_write_flush(struct kbase_device *kbdev) -{ - u32 base_count = 0; - - /* - * The caller must be holding onto the kctx or the call is from - * userspace. - */ - kbase_pm_context_active(kbdev); - kbase_pm_request_gpu_cycle_counter(kbdev); - - while (true) { - u32 new_count; - - new_count = kbase_reg_read(kbdev, - GPU_CONTROL_REG(CYCLE_COUNT_LO)); - /* First time around, just store the count. */ - if (base_count == 0) { - base_count = new_count; - continue; - } - - /* No need to handle wrapping, unsigned maths works for this. */ - if ((new_count - base_count) > 1000) - break; - } - - kbase_pm_release_gpu_cycle_counter(kbdev); - kbase_pm_context_idle(kbdev); -} -#endif /* CONFIG_MALI_NO_MALI */ diff --git a/drivers/gpu/drm/bifrost/midgard/build.bp b/drivers/gpu/drm/bifrost/midgard/build.bp deleted file mode 100755 index 2cf685c0eb66..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/build.bp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- - */ - -/* Kernel-side tests may include mali_kbase's headers. Therefore any config - * options which affect the sizes of any structs (e.g. adding extra members) - * must be included in these defaults, so that the structs are consistent in - * both mali_kbase and the test modules. */ -bob_defaults { - name: "mali_kbase_shared_config_defaults", - no_mali: { - kbuild_options: ["CONFIG_MALI_NO_MALI=y"], - }, - mali_devfreq: { - kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], - }, - mali_midgard_dvfs: { - kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], - }, - mali_debug: { - kbuild_options: ["CONFIG_MALI_DEBUG=y"], - }, - mali_fpga_bus_logger: { - kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"], - }, - cinstr_job_dump: { - kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"], - }, - cinstr_vector_dump: { - kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], - }, - cinstr_gwt: { - kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], - }, - mali_gator_support: { - kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], - }, - mali_system_trace: { - kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], - }, - mali_pwrsoft_765: { - kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], - }, - kbuild_options: [ - "MALI_UNIT_TEST={{.unit_test_code}}", - "MALI_CUSTOMER_RELEASE={{.release}}", - "MALI_USE_CSF={{.gpu_has_csf}}", - "MALI_KERNEL_TEST_API={{.debug}}", - ], - defaults: ["kernel_defaults"], -} - -bob_kernel_module { - name: "mali_kbase", - srcs: [ - "*.c", - "*.h", - "Kbuild", - "backend/gpu/*.c", - "backend/gpu/*.h", - "backend/gpu/Kbuild", - "ipa/*.c", - "ipa/*.h", - "ipa/Kbuild", - "platform/*.h", - "platform/*/*.c", - "platform/*/*.h", - "platform/*/Kbuild", - "thirdparty/*.c", - ], - kbuild_options: [ - "CONFIG_MALI_KUTF=n", - "CONFIG_MALI_MIDGARD=m", - "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", - "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", - ], - mali_fpga_bus_logger: { - extra_symbols: [ - "bus_logger", - ], - }, - mali_corestack: { - kbuild_options: ["CONFIG_MALI_CORESTACK=y"], - }, - mali_error_inject: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], - }, - mali_error_inject_random: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], - }, - cinstr_secondary_hwc: { - kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], - }, - mali_2mb_alloc: { - kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], - }, - gpu_has_csf: { - srcs: [ - "csf/*.c", - "csf/*.h", - "csf/Kbuild", - ], - }, - defaults: ["mali_kbase_shared_config_defaults"], -} diff --git a/drivers/gpu/drm/bifrost/midgard/docs/Doxyfile b/drivers/gpu/drm/bifrost/midgard/docs/Doxyfile deleted file mode 100755 index 6498dcbc1840..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/docs/Doxyfile +++ /dev/null @@ -1,132 +0,0 @@ -# -# (C) COPYRIGHT 2011-2013, 2015, 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -############################################################################## - -# This file contains per-module Doxygen configuration. Please do not add -# extra settings to this file without consulting all stakeholders, as they -# may cause override project-wide settings. -# -# Additionally, when defining aliases, macros, sections etc, use the module -# name as a prefix e.g. gles_my_alias. - -############################################################################## - -@INCLUDE = ../../bldsys/Doxyfile_common - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT += ../../kernel/drivers/gpu/arm/midgard/ - -############################################################################## -# Everything below here is optional, and in most cases not required -############################################################################## - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES += - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS += - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS += - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile - - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS += - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS += - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH += - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH += - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH += - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED += - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED += - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs - diff --git a/drivers/gpu/drm/bifrost/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/drm/bifrost/midgard/docs/policy_operation_diagram.dot deleted file mode 100755 index a15b55811482..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/docs/policy_operation_diagram.dot +++ /dev/null @@ -1,117 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -digraph policy_objects_diagram { - rankdir=LR; - size="12,8"; - compound=true; - - node [ shape = box ]; - - subgraph cluster_policy_queues { - low_queue [ shape=record label = "LowP | {ctx_lo | ... | ctx_i | ... | ctx_hi}" ]; - queues_middle_sep [ label="" shape=plaintext width=0 height=0 ]; - - rt_queue [ shape=record label = "RT | {ctx_lo | ... | ctx_j | ... | ctx_hi}" ]; - - label = "Policy's Queue(s)"; - } - - call_enqueue [ shape=plaintext label="enqueue_ctx()" ]; - - { - rank=same; - ordering=out; - call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ]; - call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ]; - - call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ]; - } - - subgraph cluster_runpool { - - as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ]; - as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ]; - as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ]; - as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ]; - - label = "Policy's Run Pool"; - } - - { - rank=same; - call_jdequeue [ shape=plaintext label="dequeue_job()" ]; - sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ]; - } - - { - rank=same; - ordering=out; - sstop [ shape=ellipse label="SS-Timer expires" ] - jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; - - irq [ label="IRQ" shape=ellipse ]; - - job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ]; - } - - hstop [ shape=ellipse label="HS-Timer expires" ] - - /* - * Edges - */ - - call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ]; - - low_queue:qr -> call_dequeue:w; - rt_queue:qr -> call_dequeue:w; - - call_dequeue -> as1 [lhead=cluster_runpool]; - - as1->call_jdequeue [ltail=cluster_runpool]; - call_jdequeue->jobslots:0; - call_jdequeue->sstop_dotfixup [ arrowhead=none]; - sstop_dotfixup->sstop [label="Spawn SS-Timer"]; - sstop->jobslots [label="SoftStop"]; - sstop->hstop [label="Spawn HS-Timer"]; - hstop->jobslots:ne [label="HardStop"]; - - - as3->call_ctxfinish:ne [ ltail=cluster_runpool ]; - call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ]; - - call_ctxfinish->call_ctxdone [constraint=false]; - - call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false]; - - - { - jobslots->irq [constraint=false]; - - irq->job_finish [constraint=false]; - } - - irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ]; - -} diff --git a/drivers/gpu/drm/bifrost/midgard/docs/policy_overview.dot b/drivers/gpu/drm/bifrost/midgard/docs/policy_overview.dot deleted file mode 100755 index 6b8733593191..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/docs/policy_overview.dot +++ /dev/null @@ -1,68 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -digraph policy_objects_diagram { - rankdir=LR - size="6,6" - compound=true; - - node [ shape = box ]; - - call_enqueue [ shape=plaintext label="enqueue ctx" ]; - - - policy_queue [ label="Policy's Queue" ]; - - { - rank=same; - runpool [ label="Policy's Run Pool" ]; - - ctx_finish [ label="ctx finished" ]; - } - - { - rank=same; - jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; - - job_finish [ label="Job finished" ]; - } - - - - /* - * Edges - */ - - call_enqueue -> policy_queue; - - policy_queue->runpool [label="dequeue ctx" weight=0.1]; - runpool->policy_queue [label="requeue ctx" weight=0.1]; - - runpool->ctx_finish [ style=dotted ]; - - runpool->jobslots [label="dequeue job" weight=0.1]; - jobslots->runpool [label="requeue job" weight=0.1]; - - jobslots->job_finish [ style=dotted ]; -} diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/Kbuild b/drivers/gpu/drm/bifrost/midgard/ipa/Kbuild deleted file mode 100755 index 3d9cf8006b80..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/Kbuild +++ /dev/null @@ -1,28 +0,0 @@ -# -# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -mali_kbase-y += \ - ipa/mali_kbase_ipa_simple.o \ - ipa/mali_kbase_ipa.o \ - ipa/mali_kbase_ipa_vinstr_g7x.o \ - ipa/mali_kbase_ipa_vinstr_common.o - -mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o \ No newline at end of file diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.c deleted file mode 100755 index 9da2878e1417..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.c +++ /dev/null @@ -1,664 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include -#include -#include -#include "mali_kbase.h" -#include "mali_kbase_ipa.h" -#include "mali_kbase_ipa_debugfs.h" -#include "mali_kbase_ipa_simple.h" -#include "backend/gpu/mali_kbase_pm_internal.h" - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) -#include -#else -#include -#define dev_pm_opp_find_freq_exact opp_find_freq_exact -#define dev_pm_opp_get_voltage opp_get_voltage -#define dev_pm_opp opp -#endif - -#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" - -static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { - &kbase_simple_ipa_model_ops, - &kbase_g71_ipa_model_ops, - &kbase_g72_ipa_model_ops, - &kbase_g76_ipa_model_ops, - &kbase_g52_ipa_model_ops, - &kbase_g52_r1_ipa_model_ops, - &kbase_g51_ipa_model_ops -}; - -int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) -{ - int err = 0; - - lockdep_assert_held(&model->kbdev->ipa.lock); - - if (model->ops->recalculate) { - err = model->ops->recalculate(model); - if (err) { - dev_err(model->kbdev->dev, - "recalculation of power model %s returned error %d\n", - model->ops->name, err); - } - } - - return err; -} - -const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, - const char *name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { - const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; - - if (!strcmp(ops->name, name)) - return ops; - } - - dev_err(kbdev->dev, "power model \'%s\' not found\n", name); - - return NULL; -} -KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); - -const char *kbase_ipa_model_name_from_id(u32 gpu_id) -{ - const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> - GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - if (GPU_ID_IS_NEW_FORMAT(prod_id)) { - switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { - case GPU_ID2_PRODUCT_TMIX: - return "mali-g71-power-model"; - case GPU_ID2_PRODUCT_THEX: - return "mali-g72-power-model"; - case GPU_ID2_PRODUCT_TNOX: - return "mali-g76-power-model"; - case GPU_ID2_PRODUCT_TSIX: - return "mali-g51-power-model"; - case GPU_ID2_PRODUCT_TGOX: - if ((gpu_id & GPU_ID2_VERSION_MAJOR) == - (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) - /* g52 aliased to g76 power-model's ops */ - return "mali-g52-power-model"; - else - return "mali-g52_r1-power-model"; - default: - return KBASE_IPA_FALLBACK_MODEL_NAME; - } - } - - return KBASE_IPA_FALLBACK_MODEL_NAME; -} -KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); - -static struct device_node *get_model_dt_node(struct kbase_ipa_model *model) -{ - struct device_node *model_dt_node; - char compat_string[64]; - - snprintf(compat_string, sizeof(compat_string), "arm,%s", - model->ops->name); - - /* of_find_compatible_node() will call of_node_put() on the root node, - * so take a reference on it first. - */ - of_node_get(model->kbdev->dev->of_node); - model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, - NULL, compat_string); - if (!model_dt_node && !model->missing_dt_node_warning) { - dev_warn(model->kbdev->dev, - "Couldn't find power_model DT node matching \'%s\'\n", - compat_string); - model->missing_dt_node_warning = true; - } - - return model_dt_node; -} - -int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, - const char *name, s32 *addr, - size_t num_elems, bool dt_required) -{ - int err, i; - struct device_node *model_dt_node = get_model_dt_node(model); - char *origin; - - err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); - /* We're done with model_dt_node now, so drop the reference taken in - * get_model_dt_node()/of_find_compatible_node(). - */ - of_node_put(model_dt_node); - - if (err && dt_required) { - memset(addr, 0, sizeof(s32) * num_elems); - dev_warn(model->kbdev->dev, - "Error %d, no DT entry: %s.%s = %zu*[0]\n", - err, model->ops->name, name, num_elems); - origin = "zero"; - } else if (err && !dt_required) { - origin = "default"; - } else /* !err */ { - origin = "DT"; - } - - /* Create a unique debugfs entry for each element */ - for (i = 0; i < num_elems; ++i) { - char elem_name[32]; - - if (num_elems == 1) - snprintf(elem_name, sizeof(elem_name), "%s", name); - else - snprintf(elem_name, sizeof(elem_name), "%s.%d", - name, i); - - dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", - model->ops->name, elem_name, addr[i], origin); - - err = kbase_ipa_model_param_add(model, elem_name, - &addr[i], sizeof(s32), - PARAM_TYPE_S32); - if (err) - goto exit; - } -exit: - return err; -} - -int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, - const char *name, char *addr, - size_t size, bool dt_required) -{ - int err; - struct device_node *model_dt_node = get_model_dt_node(model); - const char *string_prop_value; - char *origin; - - err = of_property_read_string(model_dt_node, name, - &string_prop_value); - - /* We're done with model_dt_node now, so drop the reference taken in - * get_model_dt_node()/of_find_compatible_node(). - */ - of_node_put(model_dt_node); - - if (err && dt_required) { - strncpy(addr, "", size - 1); - dev_warn(model->kbdev->dev, - "Error %d, no DT entry: %s.%s = \'%s\'\n", - err, model->ops->name, name, addr); - err = 0; - origin = "zero"; - } else if (err && !dt_required) { - origin = "default"; - } else /* !err */ { - strncpy(addr, string_prop_value, size - 1); - origin = "DT"; - } - - addr[size - 1] = '\0'; - - dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", - model->ops->name, name, string_prop_value, origin); - - err = kbase_ipa_model_param_add(model, name, addr, size, - PARAM_TYPE_STRING); - return err; -} - -void kbase_ipa_term_model(struct kbase_ipa_model *model) -{ - if (!model) - return; - - lockdep_assert_held(&model->kbdev->ipa.lock); - - if (model->ops->term) - model->ops->term(model); - - kbase_ipa_model_param_free_all(model); - - kfree(model); -} -KBASE_EXPORT_TEST_API(kbase_ipa_term_model); - -struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - const struct kbase_ipa_model_ops *ops) -{ - struct kbase_ipa_model *model; - int err; - - lockdep_assert_held(&kbdev->ipa.lock); - - if (!ops || !ops->name) - return NULL; - - model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); - if (!model) - return NULL; - - model->kbdev = kbdev; - model->ops = ops; - INIT_LIST_HEAD(&model->params); - - err = model->ops->init(model); - if (err) { - dev_err(kbdev->dev, - "init of power model \'%s\' returned error %d\n", - ops->name, err); - kfree(model); - return NULL; - } - - err = kbase_ipa_model_recalculate(model); - if (err) { - kbase_ipa_term_model(model); - return NULL; - } - - return model; -} -KBASE_EXPORT_TEST_API(kbase_ipa_init_model); - -static void kbase_ipa_term_locked(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->ipa.lock); - - /* Clean up the models */ - if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) - kbase_ipa_term_model(kbdev->ipa.configured_model); - kbase_ipa_term_model(kbdev->ipa.fallback_model); - - kbdev->ipa.configured_model = NULL; - kbdev->ipa.fallback_model = NULL; -} - -int kbase_ipa_init(struct kbase_device *kbdev) -{ - - const char *model_name; - const struct kbase_ipa_model_ops *ops; - struct kbase_ipa_model *default_model = NULL; - int err; - - mutex_init(&kbdev->ipa.lock); - /* - * Lock during init to avoid warnings from lockdep_assert_held (there - * shouldn't be any concurrent access yet). - */ - mutex_lock(&kbdev->ipa.lock); - - /* The simple IPA model must *always* be present.*/ - ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); - - default_model = kbase_ipa_init_model(kbdev, ops); - if (!default_model) { - err = -EINVAL; - goto end; - } - - kbdev->ipa.fallback_model = default_model; - err = of_property_read_string(kbdev->dev->of_node, - "ipa-model", - &model_name); - if (err) { - /* Attempt to load a match from GPU-ID */ - u32 gpu_id; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - model_name = kbase_ipa_model_name_from_id(gpu_id); - dev_dbg(kbdev->dev, - "Inferring model from GPU ID 0x%x: \'%s\'\n", - gpu_id, model_name); - err = 0; - } else { - dev_dbg(kbdev->dev, - "Using ipa-model parameter from DT: \'%s\'\n", - model_name); - } - - if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { - ops = kbase_ipa_model_ops_find(kbdev, model_name); - kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); - if (!kbdev->ipa.configured_model) { - dev_warn(kbdev->dev, - "Failed to initialize ipa-model: \'%s\'\n" - "Falling back on default model\n", - model_name); - kbdev->ipa.configured_model = default_model; - } - } else { - kbdev->ipa.configured_model = default_model; - } - -end: - if (err) - kbase_ipa_term_locked(kbdev); - else - dev_info(kbdev->dev, - "Using configured power model %s, and fallback %s\n", - kbdev->ipa.configured_model->ops->name, - kbdev->ipa.fallback_model->ops->name); - - mutex_unlock(&kbdev->ipa.lock); - return err; -} -KBASE_EXPORT_TEST_API(kbase_ipa_init); - -void kbase_ipa_term(struct kbase_device *kbdev) -{ - mutex_lock(&kbdev->ipa.lock); - kbase_ipa_term_locked(kbdev); - mutex_unlock(&kbdev->ipa.lock); - - mutex_destroy(&kbdev->ipa.lock); -} -KBASE_EXPORT_TEST_API(kbase_ipa_term); - -/** - * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP - * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range - * 0 < c < 2^26 to prevent overflow. - * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) - * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) - * - * Keep a record of the approximate range of each value at every stage of the - * calculation, to ensure we don't overflow. This makes heavy use of the - * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual - * calculations in decimal for increased accuracy. - * - * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) - */ -static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, - const u32 voltage) -{ - /* Range: 2^8 < v2 < 2^16 m(V^2) */ - const u32 v2 = (voltage * voltage) / 1000; - - /* Range: 2^3 < f_MHz < 2^10 MHz */ - const u32 f_MHz = freq / 1000000; - - /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ - const u32 v2f_big = v2 * f_MHz; - - /* Range: 2^1 < v2f < 2^16 MHz V^2 */ - const u32 v2f = v2f_big / 1000; - - /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. - * Must be < 2^42 to avoid overflowing the return value. */ - const u64 v2fc = (u64) c * (u64) v2f; - - /* Range: 0 < v2fc / 1000 < 2^13 mW */ - return div_u64(v2fc, 1000); -} - -/** - * kbase_scale_static_power() - Scale a static power coefficient to an OPP - * @c: Static model coefficient, in uW/V^3. Should be in range - * 0 < c < 2^32 to prevent overflow. - * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) - * - * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) - */ -u32 kbase_scale_static_power(const u32 c, const u32 voltage) -{ - /* Range: 2^8 < v2 < 2^16 m(V^2) */ - const u32 v2 = (voltage * voltage) / 1000; - - /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ - const u32 v3_big = v2 * voltage; - - /* Range: 2^7 < v3 < 2^19 m(V^3) */ - const u32 v3 = v3_big / 1000; - - /* - * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. - * The result should be < 2^52 to avoid overflowing the return value. - */ - const u64 v3c_big = (u64) c * (u64) v3; - - /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ - return div_u64(v3c_big, 1000000); -} - -void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* Record the event of GPU entering protected mode. */ - kbdev->ipa_protection_mode_switched = true; -} - -static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) -{ - struct kbase_ipa_model *model; - unsigned long flags; - - lockdep_assert_held(&kbdev->ipa.lock); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (kbdev->ipa_protection_mode_switched || - kbdev->ipa.force_fallback_model) - model = kbdev->ipa.fallback_model; - else - model = kbdev->ipa.configured_model; - - /* - * Having taken cognizance of the fact that whether GPU earlier - * protected mode or not, the event can be now reset (if GPU is not - * currently in protected mode) so that configured model is used - * for the next sample. - */ - if (!kbdev->protected_mode) - kbdev->ipa_protection_mode_switched = false; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return model; -} - -static u32 get_static_power_locked(struct kbase_device *kbdev, - struct kbase_ipa_model *model, - unsigned long voltage) -{ - u32 power = 0; - int err; - u32 power_coeff; - - lockdep_assert_held(&model->kbdev->ipa.lock); - - if (!model->ops->get_static_coeff) - model = kbdev->ipa.fallback_model; - - if (model->ops->get_static_coeff) { - err = model->ops->get_static_coeff(model, &power_coeff); - if (!err) - power = kbase_scale_static_power(power_coeff, - (u32) voltage); - } - - return power; -} - -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -static unsigned long kbase_get_static_power(struct devfreq *df, - unsigned long voltage) -#else -static unsigned long kbase_get_static_power(unsigned long voltage) -#endif -{ - struct kbase_ipa_model *model; - u32 power = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) - struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -#else - struct kbase_device *kbdev = kbase_find_device(-1); -#endif - - if (!kbdev) - return 0ul; - - mutex_lock(&kbdev->ipa.lock); - - model = get_current_model(kbdev); - power = get_static_power_locked(kbdev, model, voltage); - - mutex_unlock(&kbdev->ipa.lock); - -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) - kbase_release_device(kbdev); -#endif - - return power; -} - -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) -static unsigned long kbase_get_dynamic_power(struct devfreq *df, - unsigned long freq, - unsigned long voltage) -#else -static unsigned long kbase_get_dynamic_power(unsigned long freq, - unsigned long voltage) -#endif -{ - struct kbase_ipa_model *model; - u32 power_coeff = 0, power = 0; - int err = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) - struct kbase_device *kbdev = dev_get_drvdata(&df->dev); -#else - struct kbase_device *kbdev = kbase_find_device(-1); -#endif - - if (!kbdev) - return 0ul; - - mutex_lock(&kbdev->ipa.lock); - - model = kbdev->ipa.fallback_model; - - err = model->ops->get_dynamic_coeff(model, &power_coeff); - - if (!err) - power = kbase_scale_dynamic_power(power_coeff, freq, voltage); - else - dev_err_ratelimited(kbdev->dev, - "Model %s returned error code %d\n", - model->ops->name, err); - - mutex_unlock(&kbdev->ipa.lock); - -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) - kbase_release_device(kbdev); -#endif - - return power; -} - -int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, - unsigned long freq, - unsigned long voltage) -{ - struct kbase_ipa_model *model; - u32 power_coeff = 0; - int err = 0; - struct kbasep_pm_metrics diff; - u64 total_time; - - lockdep_assert_held(&kbdev->ipa.lock); - - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); - - model = get_current_model(kbdev); - - err = model->ops->get_dynamic_coeff(model, &power_coeff); - - /* If the counter model returns an error (e.g. switching back to - * protected mode and failing to read counters, or a counter sample - * with too few cycles), revert to the fallback model. - */ - if (err && model != kbdev->ipa.fallback_model) { - model = kbdev->ipa.fallback_model; - err = model->ops->get_dynamic_coeff(model, &power_coeff); - } - - if (err) - return err; - - *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); - - /* time_busy / total_time cannot be >1, so assigning the 64-bit - * result of div_u64 to *power cannot overflow. - */ - total_time = diff.time_busy + (u64) diff.time_idle; - *power = div_u64(*power * (u64) diff.time_busy, - max(total_time, 1ull)); - - *power += get_static_power_locked(kbdev, model, voltage); - - return err; -} -KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); - -int kbase_get_real_power(struct devfreq *df, u32 *power, - unsigned long freq, - unsigned long voltage) -{ - int ret; - struct kbase_device *kbdev = dev_get_drvdata(&df->dev); - - if (!kbdev) - return -ENODEV; - - mutex_lock(&kbdev->ipa.lock); - ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); - mutex_unlock(&kbdev->ipa.lock); - - return ret; -} -KBASE_EXPORT_TEST_API(kbase_get_real_power); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -struct devfreq_cooling_ops kbase_ipa_power_model_ops = { -#else -struct devfreq_cooling_power kbase_ipa_power_model_ops = { -#endif - .get_static_power = &kbase_get_static_power, - .get_dynamic_power = &kbase_get_dynamic_power, -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) - .get_real_power = &kbase_get_real_power, -#endif -}; -KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.h deleted file mode 100755 index 746204813262..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa.h +++ /dev/null @@ -1,250 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_IPA_H_ -#define _KBASE_IPA_H_ - -#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) - -struct devfreq; - -/** - * struct kbase_ipa_model - Object describing a particular IPA model. - * @kbdev: pointer to kbase device - * @model_data: opaque pointer to model specific data, accessed - * only by model specific methods. - * @ops: pointer to object containing model specific methods. - * @params: head of the list of debugfs params added for model - * @missing_dt_node_warning: flag to limit the matching power model DT not found - * warning to once. - */ -struct kbase_ipa_model { - struct kbase_device *kbdev; - void *model_data; - const struct kbase_ipa_model_ops *ops; - struct list_head params; - bool missing_dt_node_warning; -}; - -/** - * kbase_ipa_model_add_param_s32 - Add an integer model parameter - * @model: pointer to IPA model - * @name: name of corresponding debugfs entry - * @addr: address where the value is stored - * @num_elems: number of elements (1 if not an array) - * @dt_required: if false, a corresponding devicetree entry is not required, - * and the current value will be used. If true, a warning is - * output and the data is zeroed - * - * Return: 0 on success, or an error code - */ -int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, - const char *name, s32 *addr, - size_t num_elems, bool dt_required); - -/** - * kbase_ipa_model_add_param_string - Add a string model parameter - * @model: pointer to IPA model - * @name: name of corresponding debugfs entry - * @addr: address where the value is stored - * @size: size, in bytes, of the value storage (so the maximum string - * length is size - 1) - * @dt_required: if false, a corresponding devicetree entry is not required, - * and the current value will be used. If true, a warning is - * output and the data is zeroed - * - * Return: 0 on success, or an error code - */ -int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, - const char *name, char *addr, - size_t size, bool dt_required); - -struct kbase_ipa_model_ops { - char *name; - /* The init, recalculate and term ops on the default model are always - * called. However, all the other models are only invoked if the model - * is selected in the device tree. Otherwise they are never - * initialized. Additional resources can be acquired by models in - * init(), however they must be terminated in the term(). - */ - int (*init)(struct kbase_ipa_model *model); - /* Called immediately after init(), or when a parameter is changed, so - * that any coefficients derived from model parameters can be - * recalculated. */ - int (*recalculate)(struct kbase_ipa_model *model); - void (*term)(struct kbase_ipa_model *model); - /* - * get_dynamic_coeff() - calculate dynamic power coefficient - * @model: pointer to model - * @coeffp: pointer to return value location - * - * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which - * is then scaled by the IPA framework according to the current OPP's - * frequency and voltage. - * - * Return: 0 on success, or an error code. - */ - int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); - /* - * get_static_coeff() - calculate static power coefficient - * @model: pointer to model - * @coeffp: pointer to return value location - * - * Calculate a static power coefficient, with units uW/(V^3), which is - * scaled by the IPA framework according to the current OPP's voltage. - * - * Return: 0 on success, or an error code. - */ - int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); -}; - -/** - * kbase_ipa_init - Initialize the IPA feature - * @kbdev: pointer to kbase device - * - * simple IPA power model is initialized as a fallback model and if that - * initialization fails then IPA is not used. - * The device tree is read for the name of ipa model to be used, by using the - * property string "ipa-model". If that ipa model is supported then it is - * initialized but if the initialization fails then simple power model is used. - * - * Return: 0 on success, negative -errno on error - */ -int kbase_ipa_init(struct kbase_device *kbdev); - -/** - * kbase_ipa_term - Terminate the IPA feature - * @kbdev: pointer to kbase device - * - * Both simple IPA power model and model retrieved from device tree are - * terminated. - */ -void kbase_ipa_term(struct kbase_device *kbdev); - -/** - * kbase_ipa_model_recalculate - Recalculate the model coefficients - * @model: pointer to the IPA model object, already initialized - * - * It shall be called immediately after the model has been initialized - * or when the model parameter has changed, so that any coefficients - * derived from parameters can be recalculated. - * Its a wrapper for the module specific recalculate() method. - * - * Return: 0 on success, negative -errno on error - */ -int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); - -/** - * kbase_ipa_model_ops_find - Lookup an IPA model using its name - * @kbdev: pointer to kbase device - * @name: name of model to lookup - * - * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. - */ -const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, - const char *name); - -/** - * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID - * @gpu_id: GPU ID of GPU the model will be used for - * - * Return: The name of the appropriate counter-based model, or the name of the - * fallback model if no counter model exists. - */ -const char *kbase_ipa_model_name_from_id(u32 gpu_id); - -/** - * kbase_ipa_init_model - Initilaize the particular IPA model - * @kbdev: pointer to kbase device - * @ops: pointer to object containing model specific methods. - * - * Initialize the model corresponding to the @ops pointer passed. - * The init() method specified in @ops would be called. - * - * Return: pointer to kbase_ipa_model on success, NULL on error - */ -struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - const struct kbase_ipa_model_ops *ops); -/** - * kbase_ipa_term_model - Terminate the particular IPA model - * @model: pointer to the IPA model object, already initialized - * - * Terminate the model, using the term() method. - * Module specific parameters would be freed. - */ -void kbase_ipa_term_model(struct kbase_ipa_model *model); - -/** - * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into - * protected mode - * @kbdev: pointer to kbase device - * - * Makes IPA aware of the GPU switching to protected mode. - */ -void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); - -extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; - -/** - * kbase_get_real_power() - get the real power consumption of the GPU - * @df: dynamic voltage and frequency scaling information for the GPU. - * @power: where to store the power consumption, in mW. - * @freq: a frequency, in HZ. - * @voltage: a voltage, in mV. - * - * The returned value incorporates both static and dynamic power consumption. - * - * Return: 0 on success, or an error code. - */ -int kbase_get_real_power(struct devfreq *df, u32 *power, - unsigned long freq, - unsigned long voltage); - -#if MALI_UNIT_TEST -/* Called by kbase_get_real_power() to invoke the power models. - * Must be called with kbdev->ipa.lock held. - * This function is only exposed for use by unit tests. - */ -int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, - unsigned long freq, - unsigned long voltage); -#endif /* MALI_UNIT_TEST */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; -#else -extern struct devfreq_cooling_power kbase_ipa_power_model_ops; -#endif - -#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ - -static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) -{ } - -#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.c deleted file mode 100755 index 6e8c23cb7163..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include - -#include "mali_kbase.h" -#include "mali_kbase_ipa.h" -#include "mali_kbase_ipa_debugfs.h" - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) -#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE -#endif - -struct kbase_ipa_model_param { - char *name; - union { - void *voidp; - s32 *s32p; - char *str; - } addr; - size_t size; - enum kbase_ipa_model_param_type type; - struct kbase_ipa_model *model; - struct list_head link; -}; - -static int param_int_get(void *data, u64 *val) -{ - struct kbase_ipa_model_param *param = data; - - mutex_lock(¶m->model->kbdev->ipa.lock); - *(s64 *) val = *param->addr.s32p; - mutex_unlock(¶m->model->kbdev->ipa.lock); - - return 0; -} - -static int param_int_set(void *data, u64 val) -{ - struct kbase_ipa_model_param *param = data; - struct kbase_ipa_model *model = param->model; - s64 sval = (s64) val; - s32 old_val; - int err = 0; - - if (sval < S32_MIN || sval > S32_MAX) - return -ERANGE; - - mutex_lock(¶m->model->kbdev->ipa.lock); - old_val = *param->addr.s32p; - *param->addr.s32p = val; - err = kbase_ipa_model_recalculate(model); - if (err < 0) - *param->addr.s32p = old_val; - mutex_unlock(¶m->model->kbdev->ipa.lock); - - return err; -} - -DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); - -static ssize_t param_string_get(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct kbase_ipa_model_param *param = file->private_data; - ssize_t ret; - size_t len; - - mutex_lock(¶m->model->kbdev->ipa.lock); - len = strnlen(param->addr.str, param->size - 1) + 1; - ret = simple_read_from_buffer(user_buf, count, ppos, - param->addr.str, len); - mutex_unlock(¶m->model->kbdev->ipa.lock); - - return ret; -} - -static ssize_t param_string_set(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct kbase_ipa_model_param *param = file->private_data; - struct kbase_ipa_model *model = param->model; - char *old_str = NULL; - ssize_t ret = count; - size_t buf_size; - int err; - - mutex_lock(&model->kbdev->ipa.lock); - - if (count > param->size) { - ret = -EINVAL; - goto end; - } - - old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL); - if (!old_str) { - ret = -ENOMEM; - goto end; - } - - buf_size = min(param->size - 1, count); - if (copy_from_user(param->addr.str, user_buf, buf_size)) { - ret = -EFAULT; - goto end; - } - - param->addr.str[buf_size] = '\0'; - - err = kbase_ipa_model_recalculate(model); - if (err < 0) { - ret = err; - strlcpy(param->addr.str, old_str, param->size); - } - -end: - kfree(old_str); - mutex_unlock(&model->kbdev->ipa.lock); - - return ret; -} - -static const struct file_operations fops_string = { - .read = param_string_get, - .write = param_string_set, - .open = simple_open, - .llseek = default_llseek, -}; - -int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, - void *addr, size_t size, - enum kbase_ipa_model_param_type type) -{ - struct kbase_ipa_model_param *param; - - param = kzalloc(sizeof(*param), GFP_KERNEL); - - if (!param) - return -ENOMEM; - - /* 'name' is stack-allocated for array elements, so copy it into - * heap-allocated storage */ - param->name = kstrdup(name, GFP_KERNEL); - - if (!param->name) { - kfree(param); - return -ENOMEM; - } - - param->addr.voidp = addr; - param->size = size; - param->type = type; - param->model = model; - - list_add(¶m->link, &model->params); - - return 0; -} - -void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) -{ - struct kbase_ipa_model_param *param_p, *param_n; - - list_for_each_entry_safe(param_p, param_n, &model->params, link) { - list_del(¶m_p->link); - kfree(param_p->name); - kfree(param_p); - } -} - -static int force_fallback_model_get(void *data, u64 *val) -{ - struct kbase_device *kbdev = data; - - mutex_lock(&kbdev->ipa.lock); - *val = kbdev->ipa.force_fallback_model; - mutex_unlock(&kbdev->ipa.lock); - - return 0; -} - -static int force_fallback_model_set(void *data, u64 val) -{ - struct kbase_device *kbdev = data; - - mutex_lock(&kbdev->ipa.lock); - kbdev->ipa.force_fallback_model = (val ? true : false); - mutex_unlock(&kbdev->ipa.lock); - - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, - force_fallback_model_get, - force_fallback_model_set, - "%llu\n"); - -static int current_power_get(void *data, u64 *val) -{ - struct kbase_device *kbdev = data; - struct devfreq *df = kbdev->devfreq; - u32 power; - - kbase_pm_context_active(kbdev); - kbase_get_real_power(df, &power, - kbdev->current_nominal_freq, (kbdev->current_voltage / 1000)); - kbase_pm_context_idle(kbdev); - - *val = power; - - return 0; -} -DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); - -static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) -{ - struct list_head *it; - struct dentry *dir; - - lockdep_assert_held(&model->kbdev->ipa.lock); - - dir = debugfs_create_dir(model->ops->name, - model->kbdev->mali_debugfs_directory); - - if (!dir) { - dev_err(model->kbdev->dev, - "Couldn't create mali debugfs %s directory", - model->ops->name); - return; - } - - list_for_each(it, &model->params) { - struct kbase_ipa_model_param *param = - list_entry(it, - struct kbase_ipa_model_param, - link); - const struct file_operations *fops = NULL; - - switch (param->type) { - case PARAM_TYPE_S32: - fops = &fops_s32; - break; - case PARAM_TYPE_STRING: - fops = &fops_string; - break; - } - - if (unlikely(!fops)) { - dev_err(model->kbdev->dev, - "Type not set for %s parameter %s\n", - model->ops->name, param->name); - } else { - debugfs_create_file(param->name, S_IRUGO | S_IWUSR, - dir, param, fops); - } - } -} - -void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, - const char *name, s32 val) -{ - struct kbase_ipa_model_param *param; - - mutex_lock(&model->kbdev->ipa.lock); - - list_for_each_entry(param, &model->params, link) { - if (!strcmp(param->name, name)) { - if (param->type == PARAM_TYPE_S32) { - *param->addr.s32p = val; - } else { - dev_err(model->kbdev->dev, - "Wrong type for %s parameter %s\n", - model->ops->name, param->name); - } - break; - } - } - - mutex_unlock(&model->kbdev->ipa.lock); -} -KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32); - -void kbase_ipa_debugfs_init(struct kbase_device *kbdev) -{ - mutex_lock(&kbdev->ipa.lock); - - if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) - kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); - kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); - - debugfs_create_file("ipa_current_power", 0444, - kbdev->mali_debugfs_directory, kbdev, ¤t_power); - debugfs_create_file("ipa_force_fallback_model", 0644, - kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); - - mutex_unlock(&kbdev->ipa.lock); -} diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.h deleted file mode 100755 index a983d9c14216..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_debugfs.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_IPA_DEBUGFS_H_ -#define _KBASE_IPA_DEBUGFS_H_ - -enum kbase_ipa_model_param_type { - PARAM_TYPE_S32 = 1, - PARAM_TYPE_STRING, -}; - -#ifdef CONFIG_DEBUG_FS - -void kbase_ipa_debugfs_init(struct kbase_device *kbdev); -int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, - void *addr, size_t size, - enum kbase_ipa_model_param_type type); -void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); - -/** - * kbase_ipa_model_param_set_s32 - Set an integer model parameter - * - * @model: pointer to IPA model - * @name: name of corresponding debugfs entry - * @val: new value of the parameter - * - * This function is only exposed for use by unit tests running in - * kernel space. Normally it is expected that parameter values will - * instead be set via debugfs. - */ -void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, - const char *name, s32 val); - -#else /* CONFIG_DEBUG_FS */ - -static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, - const char *name, void *addr, - size_t size, - enum kbase_ipa_model_param_type type) -{ - return 0; -} - -static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) -{ } - -#endif /* CONFIG_DEBUG_FS */ - -#endif /* _KBASE_IPA_DEBUGFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.c deleted file mode 100755 index c8399ab22910..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#endif -#include -#include -#include - -#include "mali_kbase.h" -#include "mali_kbase_defs.h" -#include "mali_kbase_ipa_simple.h" -#include "mali_kbase_ipa_debugfs.h" - -#if MALI_UNIT_TEST - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -static unsigned long dummy_temp; - -static int kbase_simple_power_model_get_dummy_temp( - struct thermal_zone_device *tz, - unsigned long *temp) -{ - *temp = READ_ONCE(dummy_temp); - return 0; -} - -#else -static int dummy_temp; - -static int kbase_simple_power_model_get_dummy_temp( - struct thermal_zone_device *tz, - int *temp) -{ - *temp = READ_ONCE(dummy_temp); - return 0; -} -#endif - -/* Intercept calls to the kernel function using a macro */ -#ifdef thermal_zone_get_temp -#undef thermal_zone_get_temp -#endif -#define thermal_zone_get_temp(tz, temp) \ - kbase_simple_power_model_get_dummy_temp(tz, temp) - -void kbase_simple_power_model_set_dummy_temp(int temp) -{ - WRITE_ONCE(dummy_temp, temp); -} -KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); - -#endif /* MALI_UNIT_TEST */ - -/* - * This model is primarily designed for the Juno platform. It may not be - * suitable for other platforms. The additional resources in this model - * should preferably be minimal, as this model is rarely used when a dynamic - * model is available. - */ - -/** - * struct kbase_ipa_model_simple_data - IPA context per device - * @dynamic_coefficient: dynamic coefficient of the model - * @static_coefficient: static coefficient of the model - * @ts: Thermal scaling coefficients of the model - * @tz_name: Thermal zone name - * @gpu_tz: thermal zone device - * @poll_temperature_thread: Handle for temperature polling thread - * @current_temperature: Most recent value of polled temperature - * @temperature_poll_interval_ms: How often temperature should be checked, in ms - */ - -struct kbase_ipa_model_simple_data { - u32 dynamic_coefficient; - u32 static_coefficient; - s32 ts[4]; - char tz_name[THERMAL_NAME_LENGTH]; - struct thermal_zone_device *gpu_tz; - struct task_struct *poll_temperature_thread; - int current_temperature; - int temperature_poll_interval_ms; -}; -#define FALLBACK_STATIC_TEMPERATURE 55000 - -/** - * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient - * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N - * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 - * - * Scale the temperature according to a cubic polynomial whose coefficients are - * provided in the device tree. The result is used to scale the static power - * coefficient, where 1000000 means no change. - * - * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000. - */ -static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) -{ - /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ - const s64 t2 = div_s64((t * t), 1000); - - /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ - const s64 t3 = div_s64((t * t2), 1000); - - /* - * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in - * Deg^-N, so we need to multiply the last coefficient by 1000. - * Range: -2^63 < res_big < 2^63 - */ - const s64 res_big = ts[3] * t3 /* +/- 2^62 */ - + ts[2] * t2 /* +/- 2^55 */ - + ts[1] * t /* +/- 2^48 */ - + ts[0] * 1000; /* +/- 2^41 */ - - /* Range: -2^60 < res_unclamped < 2^60 */ - s64 res_unclamped = div_s64(res_big, 1000); - - /* Clamp to range of 0x to 10x the static power */ - return clamp(res_unclamped, (s64) 0, (s64) 10000000); -} - -/* We can't call thermal_zone_get_temp() directly in model_static_coeff(), - * because we don't know if tz->lock is held in the same thread. So poll it in - * a separate thread to get around this. */ -static int poll_temperature(void *data) -{ - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) data; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) - unsigned long temp; -#else - int temp; -#endif - - while (!kthread_should_stop()) { - struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); - - if (tz) { - int ret; - - ret = thermal_zone_get_temp(tz, &temp); - if (ret) { - pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", - ret); - temp = FALLBACK_STATIC_TEMPERATURE; - } - } else { - temp = FALLBACK_STATIC_TEMPERATURE; - } - - WRITE_ONCE(model_data->current_temperature, temp); - - msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); - } - - return 0; -} - -static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) -{ - u32 temp_scaling_factor; - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) model->model_data; - u64 coeff_big; - int temp; - - temp = READ_ONCE(model_data->current_temperature); - - /* Range: 0 <= temp_scaling_factor < 2^24 */ - temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, - temp); - - /* - * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This - * means static_coefficient must be in range - * 0 <= static_coefficient < 2^28. - */ - coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; - *coeffp = div_u64(coeff_big, 1000000); - - return 0; -} - -static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) -{ - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *) model->model_data; - - *coeffp = model_data->dynamic_coefficient; - - return 0; -} - -static int add_params(struct kbase_ipa_model *model) -{ - int err = 0; - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; - - err = kbase_ipa_model_add_param_s32(model, "static-coefficient", - &model_data->static_coefficient, - 1, true); - if (err) - goto end; - - err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", - &model_data->dynamic_coefficient, - 1, true); - if (err) - goto end; - - err = kbase_ipa_model_add_param_s32(model, "ts", - model_data->ts, 4, true); - if (err) - goto end; - - err = kbase_ipa_model_add_param_string(model, "thermal-zone", - model_data->tz_name, - sizeof(model_data->tz_name), true); - if (err) - goto end; - - model_data->temperature_poll_interval_ms = 200; - err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", - &model_data->temperature_poll_interval_ms, - 1, false); - -end: - return err; -} - -static int kbase_simple_power_model_init(struct kbase_ipa_model *model) -{ - int err; - struct kbase_ipa_model_simple_data *model_data; - - model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), - GFP_KERNEL); - if (!model_data) - return -ENOMEM; - - model->model_data = (void *) model_data; - - model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; - model_data->poll_temperature_thread = kthread_run(poll_temperature, - (void *) model_data, - "mali-simple-power-model-temp-poll"); - if (IS_ERR(model_data->poll_temperature_thread)) { - err = PTR_ERR(model_data->poll_temperature_thread); - kfree(model_data); - return err; - } - - err = add_params(model); - if (err) { - kbase_ipa_model_param_free_all(model); - kthread_stop(model_data->poll_temperature_thread); - kfree(model_data); - } - - return err; -} - -static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) -{ - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; - struct thermal_zone_device *tz; - - lockdep_assert_held(&model->kbdev->ipa.lock); - - if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { - model_data->gpu_tz = NULL; - } else { - char tz_name[THERMAL_NAME_LENGTH]; - - strlcpy(tz_name, model_data->tz_name, sizeof(tz_name)); - - /* Release ipa.lock so that thermal_list_lock is not acquired - * with ipa.lock held, thereby avoid lock ordering violation - * lockdep warning. The warning comes as a chain of locks - * ipa.lock --> thermal_list_lock --> tz->lock gets formed - * on registering devfreq cooling device when probe method - * of mali platform driver is invoked. - */ - mutex_unlock(&model->kbdev->ipa.lock); - tz = thermal_zone_get_zone_by_name(tz_name); - mutex_lock(&model->kbdev->ipa.lock); - - if (IS_ERR_OR_NULL(tz)) { - pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", - PTR_ERR(tz), tz_name); - return -EPROBE_DEFER; - } - - /* Check if another thread raced against us & updated the - * thermal zone name string. Update the gpu_tz pointer only if - * the name string did not change whilst we retrieved the new - * thermal_zone_device pointer, otherwise model_data->tz_name & - * model_data->gpu_tz would become inconsistent with each other. - * The below check will succeed only for the thread which last - * updated the name string. - */ - if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0) - model_data->gpu_tz = tz; - } - - return 0; -} - -static void kbase_simple_power_model_term(struct kbase_ipa_model *model) -{ - struct kbase_ipa_model_simple_data *model_data = - (struct kbase_ipa_model_simple_data *)model->model_data; - - kthread_stop(model_data->poll_temperature_thread); - - kfree(model_data); -} - -struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { - .name = "mali-simple-power-model", - .init = &kbase_simple_power_model_init, - .recalculate = &kbase_simple_power_model_recalculate, - .term = &kbase_simple_power_model_term, - .get_dynamic_coeff = &model_dynamic_coeff, - .get_static_coeff = &model_static_coeff, -}; -KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.h deleted file mode 100755 index fed67d527c7c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_simple.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_IPA_SIMPLE_H_ -#define _KBASE_IPA_SIMPLE_H_ - -#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) - -extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; - -#if MALI_UNIT_TEST -/** - * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value - * @temp: Temperature of the thermal zone, in millidegrees celsius. - * - * This is only intended for use in unit tests, to ensure that the temperature - * values used by the simple power model are predictable. Deterministic - * behavior is necessary to allow validation of the static power values - * computed by this model. - */ -void kbase_simple_power_model_set_dummy_temp(int temp); -#endif /* MALI_UNIT_TEST */ - -#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ - -#endif /* _KBASE_IPA_SIMPLE_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.c deleted file mode 100755 index 1a6ba0152eb0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_ipa_vinstr_common.h" -#include "mali_kbase_ipa_debugfs.h" - -#define DEFAULT_SCALING_FACTOR 5 - -/* If the value of GPU_ACTIVE is below this, use the simple model - * instead, to avoid extrapolating small amounts of counter data across - * large sample periods. - */ -#define DEFAULT_MIN_SAMPLE_CYCLES 10000 - -/** - * read_hwcnt() - read a counter value - * @model_data: pointer to model data - * @offset: offset, in bytes, into vinstr buffer - * - * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be - * incrementing every cycle over a ~100ms sample period at a high frequency, - * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. - */ -static inline u32 kbase_ipa_read_hwcnt( - struct kbase_ipa_model_vinstr_data *model_data, - u32 offset) -{ - u8 *p = (u8 *)model_data->dump_buf.dump_buf; - - return *(u32 *)&p[offset]; -} - -static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) -{ - if (S64_MAX - a < b) - return S64_MAX; - return a + b; -} - -s64 kbase_ipa_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) -{ - struct kbase_device *kbdev = model_data->kbdev; - u64 core_mask; - u32 base = 0; - s64 ret = 0; - - core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; - while (core_mask != 0ull) { - if ((core_mask & 1ull) != 0ull) { - /* 0 < counter_value < 2^27 */ - u32 counter_value = kbase_ipa_read_hwcnt(model_data, - base + counter); - - /* 0 < ret < 2^27 * max_num_cores = 2^32 */ - ret = kbase_ipa_add_saturate(ret, counter_value); - } - base += KBASE_IPA_NR_BYTES_PER_BLOCK; - core_mask >>= 1; - } - - /* Range: -2^54 < ret * coeff < 2^54 */ - return ret * coeff; -} - -s64 kbase_ipa_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) -{ - struct kbase_device *kbdev = model_data->kbdev; - const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; - u32 base = 0; - s64 ret = 0; - u32 i; - - for (i = 0; i < num_blocks; i++) { - /* 0 < counter_value < 2^27 */ - u32 counter_value = kbase_ipa_read_hwcnt(model_data, - base + counter); - - /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ - ret = kbase_ipa_add_saturate(ret, counter_value); - base += KBASE_IPA_NR_BYTES_PER_BLOCK; - } - - /* Range: -2^51 < ret * coeff < 2^51 */ - return ret * coeff; -} - -s64 kbase_ipa_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter) -{ - /* Range: 0 < counter_value < 2^27 */ - const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); - - /* Range: -2^49 < ret < 2^49 */ - return counter_value * (s64) coeff; -} - -int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) -{ - int errcode; - struct kbase_device *kbdev = model_data->kbdev; - struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; - struct kbase_hwcnt_enable_map enable_map; - const struct kbase_hwcnt_metadata *metadata = - kbase_hwcnt_virtualizer_metadata(hvirt); - - if (!metadata) - return -1; - - errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); - if (errcode) { - dev_err(kbdev->dev, "Failed to allocate IPA enable map"); - return errcode; - } - - kbase_hwcnt_enable_map_enable_all(&enable_map); - - errcode = kbase_hwcnt_virtualizer_client_create( - hvirt, &enable_map, &model_data->hvirt_cli); - kbase_hwcnt_enable_map_free(&enable_map); - if (errcode) { - dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); - model_data->hvirt_cli = NULL; - return errcode; - } - - errcode = kbase_hwcnt_dump_buffer_alloc( - metadata, &model_data->dump_buf); - if (errcode) { - dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); - kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); - model_data->hvirt_cli = NULL; - return errcode; - } - - return 0; -} - -void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) -{ - if (model_data->hvirt_cli) { - kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); - kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); - model_data->hvirt_cli = NULL; - } -} - -int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) -{ - struct kbase_ipa_model_vinstr_data *model_data = - (struct kbase_ipa_model_vinstr_data *)model->model_data; - s64 energy = 0; - size_t i; - u64 coeff = 0, coeff_mul = 0; - u64 start_ts_ns, end_ts_ns; - u32 active_cycles; - int err = 0; - - err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, - &start_ts_ns, &end_ts_ns, &model_data->dump_buf); - if (err) - goto err0; - - /* Range: 0 (GPU not used at all), to the max sampling interval, say - * 1s, * max GPU frequency (GPU 100% utilized). - * 0 <= active_cycles <= 1 * ~2GHz - * 0 <= active_cycles < 2^31 - */ - active_cycles = model_data->get_active_cycles(model_data); - - if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { - err = -ENODATA; - goto err0; - } - - /* Range: 1 <= active_cycles < 2^31 */ - active_cycles = max(1u, active_cycles); - - /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around - * -2^57 < energy < 2^57 - */ - for (i = 0; i < model_data->groups_def_num; i++) { - const struct kbase_ipa_group *group = &model_data->groups_def[i]; - s32 coeff = model_data->group_values[i]; - s64 group_energy = group->op(model_data, coeff, - group->counter_block_offset); - - energy = kbase_ipa_add_saturate(energy, group_energy); - } - - /* Range: 0 <= coeff < 2^57 */ - if (energy > 0) - coeff = energy; - - /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this - * can be constrained further: Counter values can only be increased by - * a theoretical maximum of about 64k per clock cycle. Beyond this, - * we'd have to sample every 1ms to avoid them overflowing at the - * lowest clock frequency (say 100MHz). Therefore, we can write the - * range of 'coeff' in terms of active_cycles: - * - * coeff = SUM(coeffN * counterN * num_cores_for_counterN) - * coeff <= SUM(coeffN * counterN) * max_num_cores - * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores - * (substitute max_counter = 2^16 * active_cycles) - * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores - * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5 - * coeff <= 2^46 * active_cycles - * - * So after the division: 0 <= coeff <= 2^46 - */ - coeff = div_u64(coeff, active_cycles); - - /* Not all models were derived at the same reference voltage. Voltage - * scaling is done by multiplying by V^2, so we need to *divide* by - * Vref^2 here. - * Range: 0 <= coeff <= 2^49 - */ - coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); - /* Range: 0 <= coeff <= 2^52 */ - coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); - - /* Scale by user-specified integer factor. - * Range: 0 <= coeff_mul < 2^57 - */ - coeff_mul = coeff * model_data->scaling_factor; - - /* The power models have results with units - * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this - * becomes fW/(Hz V^2), which are the units of coeff_mul. However, - * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide - * by 1000. - * Range: 0 <= coeff_mul < 2^47 - */ - coeff_mul = div_u64(coeff_mul, 1000u); - -err0: - /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ - *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); - return err; -} - -int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, - const struct kbase_ipa_group *ipa_groups_def, - size_t ipa_group_size, - kbase_ipa_get_active_cycles_callback get_active_cycles, - s32 reference_voltage) -{ - int err = 0; - size_t i; - struct kbase_ipa_model_vinstr_data *model_data; - - if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles) - return -EINVAL; - - model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); - if (!model_data) - return -ENOMEM; - - model_data->kbdev = model->kbdev; - model_data->groups_def = ipa_groups_def; - model_data->groups_def_num = ipa_group_size; - model_data->get_active_cycles = get_active_cycles; - - model->model_data = (void *) model_data; - - for (i = 0; i < model_data->groups_def_num; ++i) { - const struct kbase_ipa_group *group = &model_data->groups_def[i]; - - model_data->group_values[i] = group->default_value; - err = kbase_ipa_model_add_param_s32(model, group->name, - &model_data->group_values[i], - 1, false); - if (err) - goto exit; - } - - model_data->scaling_factor = DEFAULT_SCALING_FACTOR; - err = kbase_ipa_model_add_param_s32(model, "scale", - &model_data->scaling_factor, - 1, false); - if (err) - goto exit; - - model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; - err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", - &model_data->min_sample_cycles, - 1, false); - if (err) - goto exit; - - model_data->reference_voltage = reference_voltage; - err = kbase_ipa_model_add_param_s32(model, "reference_voltage", - &model_data->reference_voltage, - 1, false); - if (err) - goto exit; - - err = kbase_ipa_attach_vinstr(model_data); - -exit: - if (err) { - kbase_ipa_model_param_free_all(model); - kfree(model_data); - } - return err; -} - -void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) -{ - struct kbase_ipa_model_vinstr_data *model_data = - (struct kbase_ipa_model_vinstr_data *)model->model_data; - - kbase_ipa_detach_vinstr(model_data); - kfree(model_data); -} diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.h deleted file mode 100755 index 46e3cd4bc6e1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_common.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_IPA_VINSTR_COMMON_H_ -#define _KBASE_IPA_VINSTR_COMMON_H_ - -#include "mali_kbase.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" - -/* Maximum number of IPA groups for an IPA model. */ -#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 - -/* Number of bytes per hardware counter in a vinstr_buffer. */ -#define KBASE_IPA_NR_BYTES_PER_CNT 4 - -/* Number of hardware counters per block in a vinstr_buffer. */ -#define KBASE_IPA_NR_CNT_PER_BLOCK 64 - -/* Number of bytes per block in a vinstr_buffer. */ -#define KBASE_IPA_NR_BYTES_PER_BLOCK \ - (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) - -struct kbase_ipa_model_vinstr_data; - -typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *); - -/** - * struct kbase_ipa_model_vinstr_data - IPA context per device - * @kbdev: pointer to kbase device - * @groups_def: Array of IPA groups. - * @groups_def_num: Number of elements in the array of IPA groups. - * @get_active_cycles: Callback to return number of active cycles during - * counter sample period - * @hvirt_cli: hardware counter virtualizer client handle - * @dump_buf: buffer to dump hardware counters onto - * @reference_voltage: voltage, in mV, of the operating point used when - * deriving the power model coefficients. Range approx - * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 - * @scaling_factor: User-specified power scaling factor. This is an - * integer, which is multiplied by the power coefficient - * just before OPP scaling. - * Range approx 0-32: 0 < scaling_factor < 2^5 - * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of - * cycles the GPU was working) is less than - * min_sample_cycles, the counter model will return an - * error, causing the IPA framework to approximate using - * the cached simple model results instead. This may be - * more accurate than extrapolating using a very small - * counter dump. - */ -struct kbase_ipa_model_vinstr_data { - struct kbase_device *kbdev; - s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; - const struct kbase_ipa_group *groups_def; - size_t groups_def_num; - kbase_ipa_get_active_cycles_callback get_active_cycles; - struct kbase_hwcnt_virtualizer_client *hvirt_cli; - struct kbase_hwcnt_dump_buffer dump_buf; - s32 reference_voltage; - s32 scaling_factor; - s32 min_sample_cycles; -}; - -/** - * struct ipa_group - represents a single IPA group - * @name: name of the IPA group - * @default_value: default value of coefficient for IPA group. - * Coefficients are interpreted as fractions where the - * denominator is 1000000. - * @op: which operation to be performed on the counter values - * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group - */ -struct kbase_ipa_group { - const char *name; - s32 default_value; - s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32); - u32 counter_block_offset; -}; - -/** - * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores - * @model_data: pointer to model data - * @coeff: model coefficient. Unity is ~2^20, so range approx - * +/- 4.0: -2^22 < coeff < 2^22 - * @counter offset in bytes of the counter used to calculate energy - * for IPA group - * - * Calculate energy estimation based on hardware counter `counter' - * across all shader cores. - * - * Return: Sum of counter values. Range: -2^54 < ret < 2^54 - */ -s64 kbase_ipa_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); - -/** - * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks - * @model_data: pointer to model data - * @coeff: model coefficient. Unity is ~2^20, so range approx - * +/- 4.0: -2^22 < coeff < 2^22 - * @counter: offset in bytes of the counter used to calculate energy - * for IPA group - * - * Calculate energy estimation based on hardware counter `counter' across all - * memory system blocks. - * - * Return: Sum of counter values. Range: -2^51 < ret < 2^51 - */ -s64 kbase_ipa_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); - -/** - * kbase_ipa_single_counter() - sum a single counter - * @model_data: pointer to model data - * @coeff: model coefficient. Unity is ~2^20, so range approx - * +/- 4.0: -2^22 < coeff < 2^22 - * @counter: offset in bytes of the counter used to calculate energy - * for IPA group - * - * Calculate energy estimation based on hardware counter `counter'. - * - * Return: Counter value. Range: -2^49 < ret < 2^49 - */ -s64 kbase_ipa_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, u32 counter); - -/** - * attach_vinstr() - attach a vinstr_buffer to an IPA model. - * @model_data pointer to model data - * - * Attach a vinstr_buffer to an IPA model. The vinstr_buffer - * allows access to the hardware counters used to calculate - * energy consumption. - * - * Return: 0 on success, or an error code. - */ -int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); - -/** - * detach_vinstr() - detach a vinstr_buffer from an IPA model. - * @model_data pointer to model data - * - * Detach a vinstr_buffer from an IPA model. - */ -void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); - -/** - * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters - * @model: pointer to instantiated model - * @coeffp: pointer to location where calculated power, in - * pW/(Hz V^2), is stored. - * - * This is a GPU-agnostic implementation of the get_dynamic_coeff() - * function of an IPA model. It relies on the model being populated - * with GPU-specific attributes at initialization time. - * - * Return: 0 on success, or an error code. - */ -int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); - -/** - * kbase_ipa_vinstr_common_model_init() - initialize ipa power model - * @model: ipa power model to initialize - * @ipa_groups_def: array of ipa groups which sets coefficients for - * the corresponding counters used in the ipa model - * @ipa_group_size: number of elements in the array @ipa_groups_def - * @get_active_cycles: callback to return the number of cycles the GPU was - * active during the counter sample period. - * @reference_voltage: voltage, in mV, of the operating point used when - * deriving the power model coefficients. - * - * This initialization function performs initialization steps common - * for ipa models based on counter values. In each call, the model - * passes its specific coefficient values per ipa counter group via - * @ipa_groups_def array. - * - * Return: 0 on success, error code otherwise - */ -int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, - const struct kbase_ipa_group *ipa_groups_def, - size_t ipa_group_size, - kbase_ipa_get_active_cycles_callback get_active_cycles, - s32 reference_voltage); - -/** - * kbase_ipa_vinstr_common_model_term() - terminate ipa power model - * @model: ipa power model to terminate - * - * This function performs all necessary steps to terminate ipa power model - * including clean up of resources allocated to hold model data. - */ -void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); - -#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c deleted file mode 100755 index 6365d2f2d9f6..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include - -#include "mali_kbase_ipa_vinstr_common.h" -#include "mali_kbase.h" -#include "mali_kbase_ipa_debugfs.h" - - -/* Performance counter blocks base offsets */ -#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) -#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) - -/* JM counter block offsets */ -#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) - -/* Tiler counter block offsets */ -#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) - -/* MEMSYS counter block offsets */ -#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) - -/* SC counter block offsets */ -#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) -#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) -#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) -#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) -#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) -#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) -#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) -#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) -#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) -#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) -#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) - -/** - * get_jm_counter() - get performance counter offset inside the Job Manager block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - return JM_BASE + counter_block_offset; -} - -/** - * get_memsys_counter() - get performance counter offset inside the Memory System block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - /* The base address of Memory System performance counters is always the same, although their number - * may vary based on the number of cores. For the moment it's ok to return a constant. - */ - return MEMSYS_BASE + counter_block_offset; -} - -/** - * get_sc_counter() - get performance counter offset inside the Shader Cores block - * @model_data: pointer to GPU model data. - * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. - * - * Return: Block offset in bytes of the required performance counter. - */ -static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, - u32 counter_block_offset) -{ - const u32 sc_base = MEMSYS_BASE + - (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * - KBASE_IPA_NR_BYTES_PER_BLOCK); - - return sc_base + counter_block_offset; -} - -/** - * memsys_single_counter() - calculate energy for a single Memory System performance counter. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a single Memory System performance counter. - */ -static s64 kbase_g7x_sum_all_memsys_blocks( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 offset) -{ - u32 counter; - - counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset); - return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); -} - -/** - * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a Shader Cores performance counter for all cores. - */ -static s64 kbase_g7x_sum_all_shader_cores( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) -{ - u32 counter; - - counter = kbase_g7x_power_model_get_sc_counter(model_data, - counter_block_offset); - return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); -} - -/** - * jm_single_counter() - calculate energy for a single Job Manager performance counter. - * @model_data: pointer to GPU model data. - * @coeff: default value of coefficient for IPA group. - * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. - * - * Return: Energy estimation for a single Job Manager performance counter. - */ -static s64 kbase_g7x_jm_single_counter( - struct kbase_ipa_model_vinstr_data *model_data, - s32 coeff, - u32 counter_block_offset) -{ - u32 counter; - - counter = kbase_g7x_power_model_get_jm_counter(model_data, - counter_block_offset); - return kbase_ipa_single_counter(model_data, coeff, counter); -} - -/** - * get_active_cycles() - return the GPU_ACTIVE counter - * @model_data: pointer to GPU model data. - * - * Return: the number of cycles the GPU was active during the counter sampling - * period. - */ -static u32 kbase_g7x_get_active_cycles( - struct kbase_ipa_model_vinstr_data *model_data) -{ - u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); - - /* Counters are only 32-bit, so we can safely multiply by 1 then cast - * the 64-bit result back to a u32. - */ - return kbase_ipa_single_counter(model_data, 1, counter); -} - -/** Table of IPA group definitions. - * - * For each IPA group, this table defines a function to access the given performance block counter (or counters, - * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. - */ - -static const struct kbase_ipa_group ipa_groups_def_g71[] = { - { - .name = "l2_access", - .default_value = 526300, - .op = kbase_g7x_sum_all_memsys_blocks, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, - { - .name = "exec_instr_count", - .default_value = 301100, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "tex_issue", - .default_value = 197400, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_TEX_COORD_ISSUE, - }, - { - .name = "tile_wb", - .default_value = -156400, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_BEATS_WR_TIB, - }, - { - .name = "gpu_active", - .default_value = 115800, - .op = kbase_g7x_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, -}; - -static const struct kbase_ipa_group ipa_groups_def_g72[] = { - { - .name = "l2_access", - .default_value = 393000, - .op = kbase_g7x_sum_all_memsys_blocks, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, - { - .name = "exec_instr_count", - .default_value = 227000, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "tex_issue", - .default_value = 181900, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_TEX_COORD_ISSUE, - }, - { - .name = "tile_wb", - .default_value = -120200, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_BEATS_WR_TIB, - }, - { - .name = "gpu_active", - .default_value = 133100, - .op = kbase_g7x_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, -}; - -static const struct kbase_ipa_group ipa_groups_def_g76[] = { - { - .name = "gpu_active", - .default_value = 122000, - .op = kbase_g7x_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, - { - .name = "exec_instr_count", - .default_value = 488900, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "vary_instr", - .default_value = 212100, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_VARY_INSTR, - }, - { - .name = "tex_tfch_num_operations", - .default_value = 288000, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, - }, - { - .name = "l2_access", - .default_value = 378100, - .op = kbase_g7x_sum_all_memsys_blocks, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, -}; - -static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { - { - .name = "gpu_active", - .default_value = 224200, - .op = kbase_g7x_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, - { - .name = "exec_instr_count", - .default_value = 384700, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "vary_instr", - .default_value = 271900, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_VARY_INSTR, - }, - { - .name = "tex_tfch_num_operations", - .default_value = 477700, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, - }, - { - .name = "l2_access", - .default_value = 551400, - .op = kbase_g7x_sum_all_memsys_blocks, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, -}; - -static const struct kbase_ipa_group ipa_groups_def_g51[] = { - { - .name = "gpu_active", - .default_value = 201400, - .op = kbase_g7x_jm_single_counter, - .counter_block_offset = JM_GPU_ACTIVE, - }, - { - .name = "exec_instr_count", - .default_value = 392700, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_EXEC_INSTR_COUNT, - }, - { - .name = "vary_instr", - .default_value = 274000, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_VARY_INSTR, - }, - { - .name = "tex_tfch_num_operations", - .default_value = 528000, - .op = kbase_g7x_sum_all_shader_cores, - .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, - }, - { - .name = "l2_access", - .default_value = 506400, - .op = kbase_g7x_sum_all_memsys_blocks, - .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, - }, -}; - -#define IPA_POWER_MODEL_OPS(gpu, init_token) \ - const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ - .name = "mali-" #gpu "-power-model", \ - .init = kbase_ ## init_token ## _power_model_init, \ - .term = kbase_ipa_vinstr_common_model_term, \ - .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ - }; \ - KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) - -#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ - static int kbase_ ## gpu ## _power_model_init(\ - struct kbase_ipa_model *model) \ - { \ - BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ - KBASE_IPA_MAX_GROUP_DEF_NUM); \ - return kbase_ipa_vinstr_common_model_init(model, \ - ipa_groups_def_ ## gpu, \ - ARRAY_SIZE(ipa_groups_def_ ## gpu), \ - kbase_g7x_get_active_cycles, \ - (reference_voltage)); \ - } \ - IPA_POWER_MODEL_OPS(gpu, gpu) - -#define ALIAS_POWER_MODEL(gpu, as_gpu) \ - IPA_POWER_MODEL_OPS(gpu, as_gpu) - -STANDARD_POWER_MODEL(g71, 800); -STANDARD_POWER_MODEL(g72, 800); -STANDARD_POWER_MODEL(g76, 800); -STANDARD_POWER_MODEL(g52_r1, 1000); -STANDARD_POWER_MODEL(g51, 1000); - -/* g52 is an alias of g76 (TNOX) for IPA */ -ALIAS_POWER_MODEL(g52, g76); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_features.h b/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_features.h deleted file mode 100755 index 5571f846c23e..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_features.h +++ /dev/null @@ -1,614 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, - * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README - */ - -#ifndef _BASE_HWCONFIG_FEATURES_H_ -#define _BASE_HWCONFIG_FEATURES_H_ - -enum base_hw_feature { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, - BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_generic[] = { - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t60x[] = { - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t62x[] = { - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t72x[] = { - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t76x[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tFxx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t83x[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_t82x[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tMIx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tHEx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tSIx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tDVx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tNOx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tGOx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tKAx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tTRx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tNAx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tBEx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tULx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tBOx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tIDx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tVAx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_END -}; - -static const enum base_hw_feature base_hw_features_tEGx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_THREAD_GROUP_SPLIT, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_TLS_HASHING, - BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, - BASE_HW_FEATURE_END -}; - -#endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_issues.h deleted file mode 100755 index d7c40ef9e9c7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_base_hwconfig_issues.h +++ /dev/null @@ -1,1360 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, - * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README - */ - -#ifndef _BASE_HWCONFIG_ISSUES_H_ -#define _BASE_HWCONFIG_ISSUES_H_ - -enum base_hw_issue { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_6367, - BASE_HW_ISSUE_6398, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_6787, - BASE_HW_ISSUE_7027, - BASE_HW_ISSUE_7144, - BASE_HW_ISSUE_7304, - BASE_HW_ISSUE_8073, - BASE_HW_ISSUE_8186, - BASE_HW_ISSUE_8215, - BASE_HW_ISSUE_8245, - BASE_HW_ISSUE_8250, - BASE_HW_ISSUE_8260, - BASE_HW_ISSUE_8280, - BASE_HW_ISSUE_8316, - BASE_HW_ISSUE_8381, - BASE_HW_ISSUE_8394, - BASE_HW_ISSUE_8401, - BASE_HW_ISSUE_8408, - BASE_HW_ISSUE_8443, - BASE_HW_ISSUE_8456, - BASE_HW_ISSUE_8564, - BASE_HW_ISSUE_8634, - BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8791, - BASE_HW_ISSUE_8833, - BASE_HW_ISSUE_8879, - BASE_HW_ISSUE_8896, - BASE_HW_ISSUE_8975, - BASE_HW_ISSUE_8986, - BASE_HW_ISSUE_8987, - BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9418, - BASE_HW_ISSUE_9423, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_9510, - BASE_HW_ISSUE_9566, - BASE_HW_ISSUE_9630, - BASE_HW_ISSUE_10127, - BASE_HW_ISSUE_10327, - BASE_HW_ISSUE_10410, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10487, - BASE_HW_ISSUE_10607, - BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10676, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10797, - BASE_HW_ISSUE_10817, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10959, - BASE_HW_ISSUE_10969, - BASE_HW_ISSUE_10984, - BASE_HW_ISSUE_10995, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11035, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - GPUCORE_1619, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_TGOX_R1_1234, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_generic[] = { - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { - BASE_HW_ISSUE_6367, - BASE_HW_ISSUE_6398, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_6787, - BASE_HW_ISSUE_7027, - BASE_HW_ISSUE_7144, - BASE_HW_ISSUE_7304, - BASE_HW_ISSUE_8073, - BASE_HW_ISSUE_8186, - BASE_HW_ISSUE_8215, - BASE_HW_ISSUE_8245, - BASE_HW_ISSUE_8250, - BASE_HW_ISSUE_8260, - BASE_HW_ISSUE_8280, - BASE_HW_ISSUE_8316, - BASE_HW_ISSUE_8381, - BASE_HW_ISSUE_8394, - BASE_HW_ISSUE_8401, - BASE_HW_ISSUE_8408, - BASE_HW_ISSUE_8443, - BASE_HW_ISSUE_8456, - BASE_HW_ISSUE_8564, - BASE_HW_ISSUE_8634, - BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8791, - BASE_HW_ISSUE_8833, - BASE_HW_ISSUE_8896, - BASE_HW_ISSUE_8975, - BASE_HW_ISSUE_8986, - BASE_HW_ISSUE_8987, - BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9418, - BASE_HW_ISSUE_9423, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_9510, - BASE_HW_ISSUE_9566, - BASE_HW_ISSUE_9630, - BASE_HW_ISSUE_10410, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10487, - BASE_HW_ISSUE_10607, - BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10676, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10969, - BASE_HW_ISSUE_10984, - BASE_HW_ISSUE_10995, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11035, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_3964, - GPUCORE_1619, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { - BASE_HW_ISSUE_6367, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_6787, - BASE_HW_ISSUE_7027, - BASE_HW_ISSUE_7304, - BASE_HW_ISSUE_8408, - BASE_HW_ISSUE_8564, - BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8975, - BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9418, - BASE_HW_ISSUE_9423, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_9510, - BASE_HW_ISSUE_10410, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10487, - BASE_HW_ISSUE_10607, - BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10676, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10969, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11035, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { - BASE_HW_ISSUE_6367, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_6787, - BASE_HW_ISSUE_7027, - BASE_HW_ISSUE_7304, - BASE_HW_ISSUE_8408, - BASE_HW_ISSUE_8564, - BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_8975, - BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_9510, - BASE_HW_ISSUE_10410, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10487, - BASE_HW_ISSUE_10607, - BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10676, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11035, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10127, - BASE_HW_ISSUE_10327, - BASE_HW_ISSUE_10410, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10487, - BASE_HW_ISSUE_10607, - BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10676, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10817, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10959, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11035, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10959, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_10959, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_26, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3542, - BASE_HW_ISSUE_T76X_3556, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10797, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10797, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10684, - BASE_HW_ISSUE_10797, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_11056, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t72x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10797, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t76x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t60x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t62x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, - BASE_HW_ISSUE_10931, - BASE_HW_ISSUE_11012, - BASE_HW_ISSUE_11020, - BASE_HW_ISSUE_11024, - BASE_HW_ISSUE_11042, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tFRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t86x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t83x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_10883, - BASE_HW_ISSUE_10946, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T720_1386, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_t82x[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3086, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T83X_817, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8463, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TMIX_8438, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tMIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_TMIX_8456, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tHEx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tSIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tDVx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tNOx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TNOX_1194, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TGOX_R1_1234, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tGOx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tKAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tTRx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tNAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tBEx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tULx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tBOx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tIDx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tVAx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tEGx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TSIX_1116, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_END -}; - -#endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_base_kernel.h b/drivers/gpu/drm/bifrost/midgard/mali_base_kernel.h deleted file mode 100755 index 70dc3c5d4917..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_base_kernel.h +++ /dev/null @@ -1,1763 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base structures shared with the kernel. - */ - -#ifndef _BASE_KERNEL_H_ -#define _BASE_KERNEL_H_ - -typedef struct base_mem_handle { - struct { - u64 handle; - } basep; -} base_mem_handle; - -#include "mali_base_mem_priv.h" -#include "mali_midg_coherency.h" -#include "mali_kbase_gpu_id.h" - -/* - * Dependency stuff, keep it private for now. May want to expose it if - * we decide to make the number of semaphores a configurable - * option. - */ -#define BASE_JD_ATOM_COUNT 256 - -/* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) - -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - -#define BASE_MAX_COHERENT_GROUPS 16 - -#if defined CDBG_ASSERT -#define LOCAL_ASSERT CDBG_ASSERT -#elif defined KBASE_DEBUG_ASSERT -#define LOCAL_ASSERT KBASE_DEBUG_ASSERT -#else -#error assert macro not defined! -#endif - -#if defined(PAGE_MASK) && defined(PAGE_SHIFT) -#define LOCAL_PAGE_SHIFT PAGE_SHIFT -#define LOCAL_PAGE_LSB ~PAGE_MASK -#else -#include - -#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 -#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 -#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) -#else -#error Failed to find page size -#endif -#endif - -/** - * @addtogroup base_user_api User-side Base APIs - * @{ - */ - -/** - * @addtogroup base_user_api_memory User-side Base Memory APIs - * @{ - */ - -/** - * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. - * - * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator - * in order to determine the best cache policy. Some combinations are - * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), - * which defines a write-only region on the CPU side, which is - * heavily read by the CPU... - * Other flags are only meaningful to a particular allocator. - * More flags can be added to this list, as long as they don't clash - * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). - */ -typedef u32 base_mem_alloc_flags; - -/* Memory allocation, access/hint flags. - * - * See base_mem_alloc_flags. - */ - -/* IN */ -/* Read access CPU side - */ -#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) - -/* Write access CPU side - */ -#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) - -/* Read access GPU side - */ -#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) - -/* Write access GPU side - */ -#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) - -/* Execute allowed on the GPU side - */ -#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) - -/* Will be permanently mapped in kernel space. - * Flag is only allowed on allocations originating from kbase. - */ -#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) - -/* The allocation will completely reside within the same 4GB chunk in the GPU - * virtual space. - * Since this flag is primarily required only for the TLS memory which will - * not be used to contain executable code and also not used for Tiler heap, - * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. - */ -#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) - -#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7) -#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) - -/* Grow backing store on GPU Page Fault - */ -#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) - -/* Page coherence Outer shareable, if available - */ -#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) - -/* Page coherence Inner shareable - */ -#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) - -/* Should be cached on the CPU - */ -#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) - -/* IN/OUT */ -/* Must have same VA on both the GPU and the CPU - */ -#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) - -/* OUT */ -/* Must call mmap to acquire a GPU address for the alloc - */ -#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) - -/* IN */ -/* Page coherence Outer shareable, required. - */ -#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) - -/* Secure memory - */ -#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16) - -/* Not needed physical memory - */ -#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) - -/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the - * addresses to be the same - */ -#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) - -/** - * Bit 19 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) -#define BASE_MEM_MAYBE_RESERVED_BIT_19 BASE_MEM_RESERVED_BIT_19 - -/** - * Memory starting from the end of the initial commit is aligned to 'extent' - * pages, where 'extent' must be a power of 2 and no more than - * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES - */ -#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) - -/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu mode. - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - * The 3 shareability flags will be ignored for GPU uncached memory. - * If used while importing USER_BUFFER type memory, then the import will fail - * if the memory is not aligned to GPU and CPU cache line width. - */ -#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) - -/* Number of bits used as flags for base memory management - * - * Must be kept in sync with the base_mem_alloc_flags flags - */ -#define BASE_MEM_FLAGS_NR_BITS 22 - -/* A mask for all output bits, excluding IN/OUT bits. - */ -#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP - -/* A mask for all input bits, including IN/OUT bits. - */ -#define BASE_MEM_FLAGS_INPUT_MASK \ - (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) - -/* A mask for all the flags which are modifiable via the base_mem_set_flags - * interface. - */ -#define BASE_MEM_FLAGS_MODIFIABLE \ - (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ - BASE_MEM_COHERENT_LOCAL) - - -/* A mask of all currently reserved flags - */ -#define BASE_MEM_FLAGS_RESERVED \ - (BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \ - BASE_MEM_MAYBE_RESERVED_BIT_19) - -/* A mask of all the flags which are only valid for allocations within kbase, - * and may not be passed from user space. - */ -#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING) - -/* A mask of all the flags that can be returned via the base_mem_get_flags() - * interface. - */ -#define BASE_MEM_FLAGS_QUERYABLE \ - (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ - BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ - BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ - BASE_MEM_FLAGS_KERNEL_ONLY)) - -/** - * enum base_mem_import_type - Memory types supported by @a base_mem_import - * - * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type - * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) - * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a - * base_mem_import_user_buffer - * - * Each type defines what the supported handle type is. - * - * If any new type is added here ARM must be contacted - * to allocate a numeric value for it. - * Do not just add a new type without synchronizing with ARM - * as future releases from ARM might include other new types - * which could clash with your custom types. - */ -typedef enum base_mem_import_type { - BASE_MEM_IMPORT_TYPE_INVALID = 0, - /** - * Import type with value 1 is deprecated. - */ - BASE_MEM_IMPORT_TYPE_UMM = 2, - BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 -} base_mem_import_type; - -/** - * struct base_mem_import_user_buffer - Handle of an imported user buffer - * - * @ptr: address of imported user buffer - * @length: length of imported user buffer in bytes - * - * This structure is used to represent a handle of an imported user buffer. - */ - -struct base_mem_import_user_buffer { - u64 ptr; - u64 length; -}; - -/** - * @brief Invalid memory handle. - * - * Return value from functions returning @ref base_mem_handle on error. - * - * @warning @ref base_mem_handle_new_invalid must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) - -/** - * @brief Special write-alloc memory handle. - * - * A special handle is used to represent a region where a special page is mapped - * with a write-alloc cache setup, typically used when the write result of the - * GPU isn't needed, but the GPU must write anyway. - * - * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) -#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) -#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) -#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) -/* reserved handles ..-48< for future special handles */ -#define BASE_MEM_COOKIE_BASE (64ul << 12) -#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ - BASE_MEM_COOKIE_BASE) - -/* Mask to detect 4GB boundary alignment */ -#define BASE_MEM_MASK_4GB 0xfffff000UL -/* Mask to detect 4GB boundary (in page units) alignment */ -#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) - -/** - * Limit on the 'extent' parameter for an allocation with the - * BASE_MEM_TILER_ALIGN_TOP flag set - * - * This is the same as the maximum limit for a Buffer Descriptor's chunk size - */ -#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \ - (21u - (LOCAL_PAGE_SHIFT)) -#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \ - (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2)) - -/* Bit mask of cookies used for for memory allocation setup */ -#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ - -/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ -#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ - - -/** - * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs - * @{ - */ - -/** - * @brief a basic memory operation (sync-set). - * - * The content of this structure is private, and should only be used - * by the accessors. - */ -typedef struct base_syncset { - struct basep_syncset basep_sset; -} base_syncset; - -/** @} end group base_user_api_memory_defered */ - -/** - * Handle to represent imported memory object. - * Simple opague handle to imported memory, can't be used - * with anything but base_external_resource_init to bind to an atom. - */ -typedef struct base_import_handle { - struct { - u64 handle; - } basep; -} base_import_handle; - -/** @} end group base_user_api_memory */ - -/** - * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs - * @{ - */ - -typedef int platform_fence_type; -#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) - -/** - * Base stream handle. - * - * References an underlying base stream object. - */ -typedef struct base_stream { - struct { - int fd; - } basep; -} base_stream; - -/** - * Base fence handle. - * - * References an underlying base fence object. - */ -typedef struct base_fence { - struct { - int fd; - int stream_fd; - } basep; -} base_fence; - -/** - * @brief Per-job data - * - * This structure is used to store per-job data, and is completely unused - * by the Base driver. It can be used to store things such as callback - * function pointer, data to handle job completion. It is guaranteed to be - * untouched by the Base driver. - */ -typedef struct base_jd_udata { - u64 blob[2]; /**< per-job data array */ -} base_jd_udata; - -/** - * @brief Memory aliasing info - * - * Describes a memory handle to be aliased. - * A subset of the handle can be chosen for aliasing, given an offset and a - * length. - * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a - * region where a special page is mapped with a write-alloc cache setup, - * typically used when the write result of the GPU isn't needed, but the GPU - * must write anyway. - * - * Offset and length are specified in pages. - * Offset must be within the size of the handle. - * Offset+length must not overrun the size of the handle. - * - * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * @offset Offset within the handle to start aliasing from, in pages. - * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. - * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE - * specifies the number of times the special page is needed. - */ -struct base_mem_aliasing_info { - base_mem_handle handle; - u64 offset; - u64 length; -}; - -/** - * Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the - * initial commit is aligned to 'extent' pages, where 'extent' must be a power - * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES - */ -#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) - -/** - * struct base_jit_alloc_info - Structure which describes a JIT allocation - * request. - * @gpu_alloc_addr: The GPU virtual address to write the JIT - * allocated GPU virtual address to. - * @va_pages: The minimum number of virtual pages required. - * @commit_pages: The minimum number of physical pages which - * should back the allocation. - * @extent: Granularity of physical pages to grow the - * allocation by during a fault. - * @id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * Zero is not a valid value. - * @bin_id: The JIT allocation bin, used in conjunction with - * @max_allocations to limit the number of each - * type of JIT allocation. - * @max_allocations: The maximum number of allocations allowed within - * the bin specified by @bin_id. Should be the same - * for all JIT allocations within the same bin. - * @flags: flags specifying the special requirements for - * the JIT allocation. - * @padding: Expansion space - should be initialised to zero - * @usage_id: A hint about which allocation should be reused. - * The kernel should attempt to use a previous - * allocation with the same usage_id - */ -struct base_jit_alloc_info { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extent; - u8 id; - u8 bin_id; - u8 max_allocations; - u8 flags; - u8 padding[2]; - u16 usage_id; -}; - -/** - * @brief Job dependency type. - * - * A flags field will be inserted into the atom structure to specify whether a dependency is a data or - * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without - * changing the structure size). - * When the flag is set for a particular dependency to signal that it is an ordering only dependency then - * errors will not be propagated. - */ -typedef u8 base_jd_dep_type; - - -#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ -#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ -#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ - -/** - * @brief Job chain hardware requirements. - * - * A job chain must specify what GPU features it needs to allow the - * driver to schedule the job correctly. By not specifying the - * correct settings can/will cause an early job termination. Multiple - * values can be ORed together to specify multiple requirements. - * Special case is ::BASE_JD_REQ_DEP, which is used to express complex - * dependencies, and that doesn't execute anything on the hardware. - */ -typedef u32 base_jd_core_req; - -/* Requirements that come from the HW */ - -/** - * No requirement, dependency only - */ -#define BASE_JD_REQ_DEP ((base_jd_core_req)0) - -/** - * Requires fragment shaders - */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) - -/** - * Requires compute shaders - * This covers any of the following Midgard Job types: - * - Vertex Shader Job - * - Geometry Shader Job - * - An actual Compute Shader Job - * - * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the - * job is specifically just the "Compute Shader" job type, and not the "Vertex - * Shader" nor the "Geometry Shader" job type. - */ -#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ - -/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ - -/* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) - -/** - * SW-only requirement: coalesce completion events. - * If this bit is set then completion of this atom will not cause an event to - * be sent to userspace, whether successful or not; completion events will be - * deferred until an atom completes which does not have this bit set. - * - * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. - */ -#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) - -/** - * SW Only requirement: the job chain requires a coherent core group. We don't - * mind which coherent core group is used. - */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) - -/** - * SW Only requirement: The performance counters should be enabled only when - * they are needed, to reduce power consumption. - */ - -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) - -/** - * SW Only requirement: External resources are referenced by this atom. - * When external resources are referenced no syncsets can be bundled with the atom - * but should instead be part of a NULL jobs inserted into the dependency tree. - * The first pre_dep object must be configured for the external resouces to use, - * the second pre_dep object can be used to create other dependencies. - * - * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and - * BASE_JD_REQ_SOFT_EVENT_WAIT. - */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) - -/** - * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted - * to the hardware but will cause some action to happen within the driver - */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) - -#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) -#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) -#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) - -/** - * SW Only requirement : Replay job. - * - * If the preceding job fails, the replay job will cause the jobs specified in - * the list of base_jd_replay_payload pointed to by the jc pointer to be - * replayed. - * - * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT - * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay - * job is failed, as well as any following dependencies. - * - * The replayed jobs will require a number of atom IDs. If there are not enough - * free atom IDs then the replay job will fail. - * - * If the preceding job does not fail, then the replay job is returned as - * completed. - * - * The replayed jobs will never be returned to userspace. The preceding failed - * job will be returned to userspace as failed; the status of this job should - * be ignored. Completion should be determined by the status of the replay soft - * job. - * - * In order for the jobs to be replayed, the job headers will have to be - * modified. The Status field will be reset to NOT_STARTED. If the Job Type - * field indicates a Vertex Shader Job then it will be changed to Null Job. - * - * The replayed jobs have the following assumptions : - * - * - No external resources. Any required external resources will be held by the - * replay atom. - * - Pre-dependencies are created based on job order. - * - Atom numbers are automatically assigned. - * - device_nr is set to 0. This is not relevant as - * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. - * - Priority is inherited from the replay job. - */ -#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) -/** - * SW only requirement: event wait/trigger job. - * - * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. - * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the - * other waiting jobs. It completes immediately. - * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it - * possible for other jobs to wait upon. It completes immediately. - */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) - -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) - -/** - * SW only requirement: Just In Time allocation - * - * This job requests a single or multiple JIT allocations through a list - * of @base_jit_alloc_info structure which is passed via the jc element of - * the atom. The number of @base_jit_alloc_info structures present in the - * list is passed via the nr_extres element of the atom - * - * It should be noted that the id entry in @base_jit_alloc_info must not - * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. - * - * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE - * soft job to free the JIT allocation is still made. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) -/** - * SW only requirement: Just In Time free - * - * This job requests a single or multiple JIT allocations created by - * @BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the JIT - * allocations is passed via the jc element of the atom. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) - -/** - * SW only requirement: Map external resource - * - * This job requests external resource(s) are mapped once the dependencies - * of the job have been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) -/** - * SW only requirement: Unmap external resource - * - * This job requests external resource(s) are unmapped once the dependencies - * of the job has been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) - -/** - * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) - * - * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. - * - * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job - * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) - -/** - * HW Requirement: Use the base_jd_atom::device_nr field to specify a - * particular core group - * - * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority - * - * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. - * - * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. - */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) - -/** - * SW Flag: If this bit is set then the successful completion of this atom - * will not cause an event to be sent to userspace - */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) - -/** - * SW Flag: If this bit is set then completion of this atom will not cause an - * event to be sent to userspace, whether successful or not. - */ -#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) - -/** - * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job starts which does not have this bit set or a job completes - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if - * the CPU may have written to memory addressed by the job since the last job - * without this bit set was submitted. - */ -#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) - -/** - * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job completes which does not have this bit set or a job starts - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if - * the CPU may read from or partially overwrite memory addressed by the job - * before the next job without this bit set completes. - */ -#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) - -/** - * These requirement bits are currently unused in base_jd_core_req - */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) - -/** - * Mask of all bits in base_jd_core_req that control the type of the atom. - * - * This allows dependency only atoms to have flags set - */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) - -/** - * Mask of all bits in base_jd_core_req that control the type of a soft job. - */ -#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) - -/* - * Returns non-zero value if core requirements passed define a soft job or - * a dependency only job. - */ -#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ - ((core_req & BASE_JD_REQ_SOFT_JOB) || \ - (core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) - -/* - * Base Atom priority - * - * Only certain priority levels are actually implemented, as specified by the - * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority - * level that is not one of those defined below. - * - * Priority levels only affect scheduling after the atoms have had dependencies - * resolved. For example, a low priority atom that has had its dependencies - * resolved might run before a higher priority atom that has not had its - * dependencies resolved. - * - * In general, fragment atoms do not affect non-fragment atoms with - * lower priorities, and vice versa. One exception is that there is only one - * priority value for each context. So a high-priority (e.g.) fragment atom - * could increase its context priority, causing its non-fragment atoms to also - * be scheduled sooner. - * - * The atoms are scheduled as follows with respect to their priorities: - * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies - * resolved, and atom 'X' has a higher priority than atom 'Y' - * - If atom 'Y' is currently running on the HW, then it is interrupted to - * allow atom 'X' to run soon after - * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing - * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' - * - Any two atoms that have the same priority could run in any order with - * respect to each other. That is, there is no ordering constraint between - * atoms of the same priority. - * - * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are - * scheduled between contexts. The default value, 0, will cause higher-priority - * atoms to be scheduled first, regardless of their context. The value 1 will - * use a round-robin algorithm when deciding which context's atoms to schedule - * next, so higher-priority atoms can only preempt lower priority atoms within - * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and - * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. - */ -typedef u8 base_jd_prio; - -/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) -/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and - * BASE_JD_PRIO_LOW */ -#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) -/* Low atom priority. */ -#define BASE_JD_PRIO_LOW ((base_jd_prio)2) - -/* Count of the number of priority levels. This itself is not a valid - * base_jd_prio setting */ -#define BASE_JD_NR_PRIO_LEVELS 3 - -enum kbase_jd_atom_state { - /** Atom is not used */ - KBASE_JD_ATOM_STATE_UNUSED, - /** Atom is queued in JD */ - KBASE_JD_ATOM_STATE_QUEUED, - /** Atom has been given to JS (is runnable/running) */ - KBASE_JD_ATOM_STATE_IN_JS, - /** Atom has been completed, but not yet handed back to job dispatcher - * for dependency resolution */ - KBASE_JD_ATOM_STATE_HW_COMPLETED, - /** Atom has been completed, but not yet handed back to userspace */ - KBASE_JD_ATOM_STATE_COMPLETED -}; - -typedef u8 base_atom_id; /**< Type big enough to store an atom number in */ - -struct base_dependency { - base_atom_id atom_id; /**< An atom number */ - base_jd_dep_type dependency_type; /**< Dependency type */ -}; - -/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. - * In order to keep the size of the structure same, padding field has been adjusted - * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) - * is added at the end of the structure. Place in the structure previously occupied by u16 core_req - * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission - * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left - * for possible future use. */ -typedef struct base_jd_atom_v2 { - u64 jc; /**< job-chain GPU address */ - struct base_jd_udata udata; /**< user data */ - u64 extres_list; /**< list of external resources */ - u16 nr_extres; /**< nr of external resources or JIT allocations */ - u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ - struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, - this is done in order to reduce possibility of improper assigment of a dependency field */ - base_atom_id atom_number; /**< unique number to identify the atom */ - base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ - u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 padding[1]; - base_jd_core_req core_req; /**< core requirements */ -} base_jd_atom_v2; - -typedef enum base_external_resource_access { - BASE_EXT_RES_ACCESS_SHARED, - BASE_EXT_RES_ACCESS_EXCLUSIVE -} base_external_resource_access; - -typedef struct base_external_resource { - u64 ext_resource; -} base_external_resource; - - -/** - * The maximum number of external resources which can be mapped/unmapped - * in a single request. - */ -#define BASE_EXT_RES_COUNT_MAX 10 - -/** - * struct base_external_resource_list - Structure which describes a list of - * external resources. - * @count: The number of resources. - * @ext_res: Array of external resources which is - * sized at allocation time. - */ -struct base_external_resource_list { - u64 count; - struct base_external_resource ext_res[1]; -}; - -struct base_jd_debug_copy_buffer { - u64 address; - u64 size; - struct base_external_resource extres; -}; - -/** - * @brief Setter for a dependency structure - * - * @param[in] dep The kbase jd atom dependency to be initialized. - * @param id The atom_id to be assigned. - * @param dep_type The dep_type to be assigned. - * - */ -static inline void base_jd_atom_dep_set(struct base_dependency *dep, - base_atom_id id, base_jd_dep_type dep_type) -{ - LOCAL_ASSERT(dep != NULL); - - /* - * make sure we don't set not allowed combinations - * of atom_id/dependency_type. - */ - LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || - (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); - - dep->atom_id = id; - dep->dependency_type = dep_type; -} - -/** - * @brief Make a copy of a dependency structure - * - * @param[in,out] dep The kbase jd atom dependency to be written. - * @param[in] from The dependency to make a copy from. - * - */ -static inline void base_jd_atom_dep_copy(struct base_dependency *dep, - const struct base_dependency *from) -{ - LOCAL_ASSERT(dep != NULL); - - base_jd_atom_dep_set(dep, from->atom_id, from->dependency_type); -} - -/** - * @brief Soft-atom fence trigger setup. - * - * Sets up an atom to be a SW-only atom signaling a fence - * when it reaches the run state. - * - * Using the existing base dependency system the fence can - * be set to trigger when a GPU job has finished. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and @ref base_jd_submit - * has returned. - * - * @a fence must be a valid fence set up with @a base_fence_init. - * Calling this function with a uninitialized fence results in undefined behavior. - * - * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom - * @param[in] fence The base fence object to trigger. - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_init. - * @pre @p fence was @e not initialized by calling @ref base_fence_import, nor - * is it associated with a fence-trigger job that was already submitted - * by calling @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); - LOCAL_ASSERT(fence->basep.stream_fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; -} - -/** - * @brief Soft-atom fence wait setup. - * - * Sets up an atom to be a SW-only atom waiting on a fence. - * When the fence becomes triggered the atom becomes runnable - * and completes immediately. - * - * Using the existing base dependency system the fence can - * be set to block a GPU job until it has been triggered. - * - * The base fence object must not be terminated until the atom - * has been submitted to @ref base_jd_submit and - * @ref base_jd_submit has returned. - * - * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom - * @param[in] fence The base fence object to wait on - * - * @pre @p fence must reference a @ref base_fence successfully initialized by - * calling @ref base_fence_import, or it must be associated with a - * fence-trigger job that was already submitted by calling - * @ref base_jd_submit. - * @post @p atom can be submitted by calling @ref base_jd_submit. - */ -static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) -{ - LOCAL_ASSERT(atom); - LOCAL_ASSERT(fence); - LOCAL_ASSERT(fence->basep.fd >= 0); - atom->jc = (uintptr_t) fence; - atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; -} - -/** - * @brief External resource info initialization. - * - * Sets up an external resource object to reference - * a memory allocation and the type of access requested. - * - * @param[in] res The resource object to initialize - * @param handle The handle to the imported memory object, must be - * obtained by calling @ref base_mem_as_import_handle(). - * @param access The type of access requested - */ -static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) -{ - u64 address; - - address = handle.basep.handle; - - LOCAL_ASSERT(res != NULL); - LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); - LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); - - res->ext_resource = address | (access & LOCAL_PAGE_LSB); -} - -/** - * @brief Job chain event code bits - * Defines the bits used to create ::base_jd_event_code - */ -enum { - BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ - BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ - BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ - BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ - BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ - BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ - BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ - BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ -}; - -/** - * @brief Job chain event codes - * - * HW and low-level SW events are represented by event codes. - * The status of jobs which succeeded are also represented by - * an event code (see ::BASE_JD_EVENT_DONE). - * Events are usually reported as part of a ::base_jd_event. - * - * The event codes are encoded in the following way: - * @li 10:0 - subtype - * @li 12:11 - type - * @li 13 - SW success (only valid if the SW bit is set) - * @li 14 - SW event (HW event if not set) - * @li 15 - Kernel event (should never be seen in userspace) - * - * Events are split up into ranges as follows: - * - BASE_JD_EVENT_RANGE_\_START - * - BASE_JD_EVENT_RANGE_\_END - * - * \a code is in \'s range when: - * - BASE_JD_EVENT_RANGE_\_START <= code < BASE_JD_EVENT_RANGE_\_END - * - * Ranges can be asserted for adjacency by testing that the END of the previous - * is equal to the START of the next. This is useful for optimizing some tests - * for range. - * - * A limitation is that the last member of this enum must explicitly be handled - * (with an assert-unreachable statement) in switch statements that use - * variables of this type. Otherwise, the compiler warns that we have not - * handled that enum value. - */ -typedef enum base_jd_event_code { - /* HW defined exceptions */ - - /** Start of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, - - /* non-fatal exceptions */ - BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ - BASE_JD_EVENT_DONE = 0x01, - BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ - BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ - BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ - - /** End of HW Non-fault status codes - * - * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, - * because the job was hard-stopped - */ - BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, - - /** Start of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, - - /* job exceptions */ - BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, - BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, - BASE_JD_EVENT_JOB_READ_FAULT = 0x42, - BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, - BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, - BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, - BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, - BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, - BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, - BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, - BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, - BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, - BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, - BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, - BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, - BASE_JD_EVENT_STATE_FAULT = 0x5A, - BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, - BASE_JD_EVENT_UNKNOWN = 0x7F, - - /* GPU exceptions */ - BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, - BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, - - /* MMU exceptions */ - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, - BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, - BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, - BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, - BASE_JD_EVENT_ACCESS_FLAG = 0xD8, - - /* SW defined exceptions */ - BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, - BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, - BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, - BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, - BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005, - - BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, - - /** End of HW fault and SW Error status codes */ - BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, - - BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, - BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, - BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, - - /** End of SW Success status codes */ - BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, - - /** Start of Kernel-only status codes. Such codes are never returned to user-space */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, - BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, - - /** End of Kernel-only status codes. */ - BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF -} base_jd_event_code; - -/** - * @brief Event reporting structure - * - * This structure is used by the kernel driver to report information - * about GPU events. The can either be HW-specific events or low-level - * SW events, such as job-chain completion. - * - * The event code contains an event type field which can be extracted - * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. - * - * Based on the event type base_jd_event::data holds: - * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed - * job-chain - * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has - * been completed (ie all contained job-chains have been completed). - * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used - */ -typedef struct base_jd_event_v2 { - base_jd_event_code event_code; /**< event code */ - base_atom_id atom_number; /**< the atom number that has completed */ - struct base_jd_udata udata; /**< user data */ -} base_jd_event_v2; - -/** - * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. - * - * This structure is stored into the memory pointed to by the @c jc field - * of @ref base_jd_atom. - * - * It must not occupy the same CPU cache line(s) as any neighboring data. - * This is to avoid cases where access to pages containing the structure - * is shared between cached and un-cached memory regions, which would - * cause memory corruption. - */ - -typedef struct base_dump_cpu_gpu_counters { - u64 system_time; - u64 cycle_counter; - u64 sec; - u32 usec; - u8 padding[36]; -} base_dump_cpu_gpu_counters; - -/** @} end group base_user_api_job_dispatch */ - -#define GPU_MAX_JOB_SLOTS 16 - -/** - * @page page_base_user_api_gpuprops User-side Base GPU Property Query API - * - * The User-side Base GPU Property Query API encapsulates two - * sub-modules: - * - * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" - * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" - * - * There is a related third module outside of Base, which is owned by the MIDG - * module: - * - @ref gpu_props_static "Midgard Compile-time GPU Properties" - * - * Base only deals with properties that vary between different Midgard - * implementations - the Dynamic GPU properties and the Platform Config - * properties. - * - * For properties that are constant for the Midgard Architecture, refer to the - * MIDG module. However, we will discuss their relevance here just to - * provide background information. - * - * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules - * - * The compile-time properties (Platform Config, Midgard Compile-time - * properties) are exposed as pre-processor macros. - * - * Complementing the compile-time properties are the Dynamic GPU - * Properties, which act as a conduit for the Midgard Configuration - * Discovery. - * - * In general, the dynamic properties are present to verify that the platform - * has been configured correctly with the right set of Platform Config - * Compile-time Properties. - * - * As a consistent guide across the entire DDK, the choice for dynamic or - * compile-time should consider the following, in order: - * -# Can the code be written so that it doesn't need to know the - * implementation limits at all? - * -# If you need the limits, get the information from the Dynamic Property - * lookup. This should be done once as you fetch the context, and then cached - * as part of the context data structure, so it's cheap to access. - * -# If there's a clear and arguable inefficiency in using Dynamic Properties, - * then use a Compile-Time Property (Platform Config, or Midgard Compile-time - * property). Examples of where this might be sensible follow: - * - Part of a critical inner-loop - * - Frequent re-use throughout the driver, causing significant extra load - * instructions or control flow that would be worthwhile optimizing out. - * - * We cannot provide an exhaustive set of examples, neither can we provide a - * rule for every possible situation. Use common sense, and think about: what - * the rest of the driver will be doing; how the compiler might represent the - * value if it is a compile-time constant; whether an OEM shipping multiple - * devices would benefit much more from a single DDK binary, instead of - * insignificant micro-optimizations. - * - * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties - * - * Dynamic GPU properties are presented in two sets: - * -# the commonly used properties in @ref base_gpu_props, which have been - * unpacked from GPU register bitfields. - * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props - * (also a member of @ref base_gpu_props). All of these are presented in - * the packed form, as presented by the GPU registers themselves. - * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to - * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device - * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali - * Tools software on the host PC. - * - * The properties returned extend the Midgard Configuration Discovery - * registers. For example, GPU clock speed is not specified in the Midgard - * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. - * - * The GPU properties are obtained by a call to - * base_get_gpu_props(). This simply returns a pointer to a const - * base_gpu_props structure. It is constant for the life of a base - * context. Multiple calls to base_get_gpu_props() to a base context - * return the same pointer to a constant structure. This avoids cache pollution - * of the common data. - * - * This pointer must not be freed, because it does not point to the start of a - * region allocated by the memory allocator; instead, just close the @ref - * base_context. - * - * - * @section sec_base_user_api_gpuprops_kernel Kernel Operation - * - * During Base Context Create time, user-side makes a single kernel call: - * - A call to fill user memory with GPU information structures - * - * The kernel-side will fill the provided the entire processed @ref base_gpu_props - * structure, because this information is required in both - * user and kernel side; it does not make sense to decode it twice. - * - * Coherency groups must be derived from the bitmasks, but this can be done - * kernel side, and just once at kernel startup: Coherency groups must already - * be known kernel-side, to support chains that specify a 'Only Coherent Group' - * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. - * - * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation - * Creation of the coherent group data is done at device-driver startup, and so - * is one-time. This will most likely involve a loop with CLZ, shifting, and - * bit clearing on the L2_PRESENT mask, depending on whether the - * system is L2 Coherent. The number of shader cores is done by a - * population count, since faulty cores may be disabled during production, - * producing a non-contiguous mask. - * - * The memory requirements for this algorithm can be determined either by a u64 - * population count on the L2_PRESENT mask (a LUT helper already is - * required for the above), or simple assumption that there can be no more than - * 16 coherent groups, since core groups are typically 4 cores. - */ - -/** - * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs - * @{ - */ - -/** - * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties - * @{ - */ - -#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 - -#define BASE_MAX_COHERENT_GROUPS 16 - -struct mali_base_gpu_core_props { - /** - * Product specific value. - */ - u32 product_id; - - /** - * Status of the GPU release. - * No defined values, but starts at 0 and increases by one for each - * release status (alpha, beta, EAC, etc.). - * 4 bit values (0-15). - */ - u16 version_status; - - /** - * Minor release number of the GPU. "P" part of an "RnPn" release number. - * 8 bit values (0-255). - */ - u16 minor_revision; - - /** - * Major release number of the GPU. "R" part of an "RnPn" release number. - * 4 bit values (0-15). - */ - u16 major_revision; - - u16 padding; - - /* The maximum GPU frequency. Reported to applications by - * clGetDeviceInfo() - */ - u32 gpu_freq_khz_max; - - /** - * Size of the shader program counter, in bits. - */ - u32 log2_program_counter_size; - - /** - * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a - * bitpattern where a set bit indicates that the format is supported. - * - * Before using a texture format, it is recommended that the corresponding - * bit be checked. - */ - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - - /** - * Theoretical maximum memory available to the GPU. It is unlikely that a - * client will be able to allocate all of this memory for their own - * purposes, but this at least provides an upper bound on the memory - * available to the GPU. - * - * This is required for OpenCL's clGetDeviceInfo() call when - * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The - * client will not be expecting to allocate anywhere near this value. - */ - u64 gpu_available_memory_size; - - /** - * The number of execution engines. - */ - u8 num_exec_engines; -}; - -/** - * - * More information is possible - but associativity and bus width are not - * required by upper-level apis. - */ -struct mali_base_gpu_l2_cache_props { - u8 log2_line_size; - u8 log2_cache_size; - u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ - u8 padding[5]; -}; - -struct mali_base_gpu_tiler_props { - u32 bin_size_bytes; /* Max is 4*2^15 */ - u32 max_active_levels; /* Max is 2^15 */ -}; - -/** - * GPU threading system details. - */ -struct mali_base_gpu_thread_props { - u32 max_threads; /* Max. number of threads per core */ - u32 max_workgroup_size; /* Max. number of threads per workgroup */ - u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ - u16 max_registers; /* Total size [1..65535] of the register file available per core. */ - u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ - u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ - u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ - u8 padding[3]; - u32 tls_alloc; /* Number of threads per core that TLS must - * be allocated for - */ -}; - -/** - * @brief descriptor for a coherent group - * - * \c core_mask exposes all cores in that coherent group, and \c num_cores - * provides a cached population-count for that mask. - * - * @note Whilst all cores are exposed in the mask, not all may be available to - * the application, depending on the Kernel Power policy. - * - * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. - */ -struct mali_base_gpu_coherent_group { - u64 core_mask; /**< Core restriction mask required for the group */ - u16 num_cores; /**< Number of cores in the group */ - u16 padding[3]; -}; - -/** - * @brief Coherency group information - * - * Note that the sizes of the members could be reduced. However, the \c group - * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte - * aligned, thus leading to wastage if the other members sizes were reduced. - * - * The groups are sorted by core mask. The core masks are non-repeating and do - * not intersect. - */ -struct mali_base_gpu_coherent_group_info { - u32 num_groups; - - /** - * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. - * - * The GPU Counter dumping writes 2048 bytes per core group, regardless of - * whether the core groups are coherent or not. Hence this member is needed - * to calculate how much memory is required for dumping. - * - * @note Do not use it to work out how many valid elements are in the - * group[] member. Use num_groups instead. - */ - u32 num_core_groups; - - /** - * Coherency features of the memory, accessed by @ref gpu_mem_features - * methods - */ - u32 coherency; - - u32 padding; - - /** - * Descriptors of coherent groups - */ - struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; -}; - -/** - * A complete description of the GPU's Hardware Configuration Discovery - * registers. - * - * The information is presented inefficiently for access. For frequent access, - * the values should be better expressed in an unpacked form in the - * base_gpu_props structure. - * - * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to - * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device - * behaving differently?". In this case, all information about the - * configuration is potentially useful, but it does not need to be processed - * by the driver. Instead, the raw registers can be processed by the Mali - * Tools software on the host PC. - * - */ -struct gpu_raw_gpu_props { - u64 shader_present; - u64 tiler_present; - u64 l2_present; - u64 stack_present; - - u32 l2_features; - u32 core_features; - u32 mem_features; - u32 mmu_features; - - u32 as_present; - - u32 js_present; - u32 js_features[GPU_MAX_JOB_SLOTS]; - u32 tiler_features; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - - u32 gpu_id; - - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; - - /* - * Note: This is the _selected_ coherency mode rather than the - * available modes as exposed in the coherency_features register. - */ - u32 coherency_mode; - - u32 thread_tls_alloc; -}; - -/** - * Return structure for base_get_gpu_props(). - * - * NOTE: the raw_props member in this data structure contains the register - * values from which the value of the other members are derived. The derived - * members exist to allow for efficient access and/or shielding the details - * of the layout of the registers. - * - */ -typedef struct base_gpu_props { - struct mali_base_gpu_core_props core_props; - struct mali_base_gpu_l2_cache_props l2_props; - u64 unused_1; /* keep for backwards compatibility */ - struct mali_base_gpu_tiler_props tiler_props; - struct mali_base_gpu_thread_props thread_props; - - /** This member is large, likely to be 128 bytes */ - struct gpu_raw_gpu_props raw_props; - - /** This must be last member of the structure */ - struct mali_base_gpu_coherent_group_info coherency_info; -} base_gpu_props; - -/** @} end group base_user_api_gpuprops_dyn */ - -/** @} end group base_user_api_gpuprops */ - -/** - * @addtogroup base_user_api_core User-side Base core APIs - * @{ - */ - -/** - * Flags to pass to ::base_context_init. - * Flags can be ORed together to enable multiple things. - * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must - * not collide with them. - */ -typedef u32 base_context_create_flags; - -/** No flags set */ -#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) - -/** Base context is embedded in a cctx object (flag used for CINSTR - * software counter macros) - */ -#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) - -/** Base context is a 'System Monitor' context for Hardware counters. - * - * One important side effect of this is that job submission is disabled. - */ -#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ - ((base_context_create_flags)1 << 1) - - -/** - * Bitpattern describing the ::base_context_create_flags that can be - * passed to base_context_init() - */ -#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ - (BASE_CONTEXT_CCTX_EMBEDDED | \ - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) - -/** - * Bitpattern describing the ::base_context_create_flags that can be - * passed to the kernel - */ -#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED - -/* - * Private flags used on the base context - * - * These start at bit 31, and run down to zero. - * - * They share the same space as @ref base_context_create_flags, and so must - * not collide with them. - */ -/** Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) - -/** @} end group base_user_api_core */ - -/** @} end group base_user_api */ - -/** - * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties - * @{ - * - * C Pre-processor macros are exposed here to do with Platform - * Config. - * - * These include: - * - GPU Properties that are constant on a particular Midgard Family - * Implementation e.g. Maximum samples per pixel on Mali-T600. - * - General platform config for the GPU, such as the GPU major and minor - * revison. - */ - -/** @} end group base_plat_config_gpuprops */ - -/** - * @addtogroup base_api Base APIs - * @{ - */ - -/** - * @brief The payload for a replay job. This must be in GPU memory. - */ -typedef struct base_jd_replay_payload { - /** - * Pointer to the first entry in the base_jd_replay_jc list. These - * will be replayed in @b reverse order (so that extra ones can be added - * to the head in future soft jobs without affecting this soft job) - */ - u64 tiler_jc_list; - - /** - * Pointer to the fragment job chain. - */ - u64 fragment_jc; - - /** - * Pointer to the tiler heap free FBD field to be modified. - */ - u64 tiler_heap_free; - - /** - * Hierarchy mask for the replayed fragment jobs. May be zero. - */ - u16 fragment_hierarchy_mask; - - /** - * Hierarchy mask for the replayed tiler jobs. May be zero. - */ - u16 tiler_hierarchy_mask; - - /** - * Default weight to be used for hierarchy levels not in the original - * mask. - */ - u32 hierarchy_default_weight; - - /** - * Core requirements for the tiler job chain - */ - base_jd_core_req tiler_core_req; - - /** - * Core requirements for the fragment job chain - */ - base_jd_core_req fragment_core_req; -} base_jd_replay_payload; - -/** - * @brief An entry in the linked list of job chains to be replayed. This must - * be in GPU memory. - */ -typedef struct base_jd_replay_jc { - /** - * Pointer to next entry in the list. A setting of NULL indicates the - * end of the list. - */ - u64 next; - - /** - * Pointer to the job chain. - */ - u64 jc; - -} base_jd_replay_jc; - -/* Maximum number of jobs allowed in a fragment chain in the payload of a - * replay job */ -#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256 - -/** @} end group base_api */ - -/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, - * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */ -#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) - -/* Indicate that job dumping is enabled. This could affect certain timers - * to account for the performance impact. */ -#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) - -#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ - BASE_TLSTREAM_JOB_DUMPING_ENABLED) - - -#endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_base_mem_priv.h b/drivers/gpu/drm/bifrost/midgard/mali_base_mem_priv.h deleted file mode 100755 index 52c8a4f7d2d8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_base_mem_priv.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _BASE_MEM_PRIV_H_ -#define _BASE_MEM_PRIV_H_ - -#define BASE_SYNCSET_OP_MSYNC (1U << 0) -#define BASE_SYNCSET_OP_CSYNC (1U << 1) - -/* - * This structure describe a basic memory coherency operation. - * It can either be: - * @li a sync from CPU to Memory: - * - type = ::BASE_SYNCSET_OP_MSYNC - * - mem_handle = a handle to the memory object on which the operation - * is taking place - * - user_addr = the address of the range to be synced - * - size = the amount of data to be synced, in bytes - * - offset is ignored. - * @li a sync from Memory to CPU: - * - type = ::BASE_SYNCSET_OP_CSYNC - * - mem_handle = a handle to the memory object on which the operation - * is taking place - * - user_addr = the address of the range to be synced - * - size = the amount of data to be synced, in bytes. - * - offset is ignored. - */ -struct basep_syncset { - base_mem_handle mem_handle; - u64 user_addr; - u64 size; - u8 type; - u8 padding[7]; -}; - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase.h deleted file mode 100755 index ca245b44b02f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase.h +++ /dev/null @@ -1,709 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _KBASE_H_ -#define _KBASE_H_ - -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) -#include -#endif -#include -#include -#include -#include -#include - -#include "mali_base_kernel.h" -#include - -/* - * Include mali_kbase_defs.h first as this provides types needed by other local - * header files. - */ -#include "mali_kbase_defs.h" - -#include "mali_kbase_context.h" -#include "mali_kbase_strings.h" -#include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_js.h" -#include "mali_kbase_utility.h" -#include "mali_kbase_mem.h" -#include "mali_kbase_gpu_memory_debugfs.h" -#include "mali_kbase_mem_profile_debugfs.h" -#include "mali_kbase_debug_job_fault.h" -#include "mali_kbase_jd_debugfs.h" -#include "mali_kbase_gpuprops.h" -#include "mali_kbase_jm.h" -#include "mali_kbase_ioctl.h" - -#include "ipa/mali_kbase_ipa.h" - -#ifdef CONFIG_GPU_TRACEPOINTS -#include -#endif - - -#ifndef u64_to_user_ptr -/* Introduced in Linux v4.6 */ -#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) -#endif - -/* - * Kernel-side Base (KBase) APIs - */ - -struct kbase_device *kbase_device_alloc(void); -/* -* note: configuration attributes member of kbdev needs to have -* been setup before calling kbase_device_init -*/ - -/* -* API to acquire device list semaphore and return pointer -* to the device list head -*/ -const struct list_head *kbase_dev_list_get(void); -/* API to release the device list semaphore */ -void kbase_dev_list_put(const struct list_head *dev_list); - -int kbase_device_init(struct kbase_device * const kbdev); -void kbase_device_term(struct kbase_device *kbdev); -void kbase_device_free(struct kbase_device *kbdev); -int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); - -/* Needed for gator integration and for reporting vsync information */ -struct kbase_device *kbase_find_device(int minor); -void kbase_release_device(struct kbase_device *kbdev); - - -/** - * kbase_get_unmapped_area() - get an address range which is currently - * unmapped. - * @filp: File operations associated with kbase device. - * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed - * as Mali GPU driver decides about the mapping). - * @len: Length of the address range. - * @pgoff: Page offset within the GPU address space of the kbase context. - * @flags: Flags for the allocation. - * - * Finds the unmapped address range which satisfies requirements specific to - * GPU and those provided by the call parameters. - * - * 1) Requirement for allocations greater than 2MB: - * - alignment offset is set to 2MB and the alignment mask to 2MB decremented - * by 1. - * - * 2) Requirements imposed for the shader memory alignment: - * - alignment is decided by the number of GPU pc bits which can be read from - * GPU properties of the device associated with this kbase context; alignment - * offset is set to this value in bytes and the alignment mask to the offset - * decremented by 1. - * - allocations must not to be at 4GB boundaries. Such cases are indicated - * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase - * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB. - * - * 3) Requirements imposed for tiler memory alignment, cases indicated by - * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase - * region): - * - alignment offset is set to the difference between the kbase region - * extent (converted from the original value in pages to bytes) and the kbase - * region initial_commit (also converted from the original value in pages to - * bytes); alignment mask is set to the kbase region extent in bytes and - * decremented by 1. - * - * Return: if successful, address of the unmapped area aligned as required; - * error code (negative) in case of failure; - */ -unsigned long kbase_get_unmapped_area(struct file *filp, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags); - -int kbase_jd_init(struct kbase_context *kctx); -void kbase_jd_exit(struct kbase_context *kctx); - -/** - * kbase_jd_submit - Submit atoms to the job dispatcher - * - * @kctx: The kbase context to submit to - * @user_addr: The address in user space of the struct base_jd_atom_v2 array - * @nr_atoms: The number of atoms in the array - * @stride: sizeof(struct base_jd_atom_v2) - * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) - * - * Return: 0 on success or error code - */ -int kbase_jd_submit(struct kbase_context *kctx, - void __user *user_addr, u32 nr_atoms, u32 stride, - bool uk6_atom); - -/** - * kbase_jd_done_worker - Handle a job completion - * @data: a &struct work_struct - * - * This function requeues the job from the runpool (if it was soft-stopped or - * removed from NEXT registers). - * - * Removes it from the system if it finished/failed/was cancelled. - * - * Resolves dependencies to add dependent jobs to the context, potentially - * starting them if necessary (which may add more references to the context) - * - * Releases the reference to the context from the no-longer-running job. - * - * Handles retrying submission outside of IRQ context if it failed from within - * IRQ context. - */ -void kbase_jd_done_worker(struct work_struct *data); - -void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, - kbasep_js_atom_done_code done_code); -void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -void kbase_jd_zap_context(struct kbase_context *kctx); -bool jd_done_nolock(struct kbase_jd_atom *katom, - struct list_head *completed_jobs_ctx); -void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); -bool jd_submit_atom(struct kbase_context *kctx, - const struct base_jd_atom_v2 *user_atom, - struct kbase_jd_atom *katom); -void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); - -void kbase_job_done(struct kbase_device *kbdev, u32 done); - -/** - * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms - * and soft stop them - * @kctx: Pointer to context to check. - * @katom: Pointer to priority atom. - * - * Atoms from @kctx on the same job slot as @katom, which have lower priority - * than @katom will be soft stopped and put back in the queue, so that atoms - * with higher priority can run. - * - * The hwaccess_lock must be held when calling this function. - */ -void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom); -void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, - struct kbase_jd_atom *target_katom, u32 sw_flags); -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); -void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom); - -void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); -int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); -int kbase_event_pending(struct kbase_context *ctx); -int kbase_event_init(struct kbase_context *kctx); -void kbase_event_close(struct kbase_context *kctx); -void kbase_event_cleanup(struct kbase_context *kctx); -void kbase_event_wakeup(struct kbase_context *kctx); - -/** - * kbasep_jit_alloc_validate() - Validate the JIT allocation info. - * - * @kctx: Pointer to the kbase context within which the JIT - * allocation is to be validated. - * @info: Pointer to struct @base_jit_alloc_info - * which is to be validated. - * @return: 0 if jit allocation is valid; negative error code otherwise - */ -int kbasep_jit_alloc_validate(struct kbase_context *kctx, - struct base_jit_alloc_info *info); -/** - * kbase_free_user_buffer() - Free memory allocated for struct - * @kbase_debug_copy_buffer. - * - * @buffer: Pointer to the memory location allocated for the object - * of the type struct @kbase_debug_copy_buffer. - */ -static inline void kbase_free_user_buffer( - struct kbase_debug_copy_buffer *buffer) -{ - struct page **pages = buffer->extres_pages; - int nr_pages = buffer->nr_extres_pages; - - if (pages) { - int i; - - for (i = 0; i < nr_pages; i++) { - struct page *pg = pages[i]; - - if (pg) - put_page(pg); - } - kfree(pages); - } -} - -/** - * kbase_mem_copy_from_extres_page() - Copy pages from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @extres_pages: Pointer to the pages which correspond to the external - * resources from which the copying will take place. - * @pages: Pointer to the pages to which the content is to be - * copied from the provided external resources. - * @nr_pages: Number of pages to copy. - * @target_page_nr: Number of target pages which will be used for copying. - * @offset: Offset into the target pages from which the copying - * is to be performed. - * @to_copy: Size of the chunk to be copied, in bytes. - */ -void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, - void *extres_page, struct page **pages, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset, size_t *to_copy); -/** - * kbase_mem_copy_from_extres() - Copy from external resources. - * - * @kctx: kbase context within which the copying is to take place. - * @buf_data: Pointer to the information about external resources: - * pages pertaining to the external resource, number of - * pages to copy. - */ -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data); -int kbase_process_soft_job(struct kbase_jd_atom *katom); -int kbase_prepare_soft_job(struct kbase_jd_atom *katom); -void kbase_finish_soft_job(struct kbase_jd_atom *katom); -void kbase_cancel_soft_job(struct kbase_jd_atom *katom); -void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); -void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); -#endif -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status); - -bool kbase_replay_process(struct kbase_jd_atom *katom); - -void kbasep_soft_job_timeout_worker(struct timer_list *timer); -void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); - -void kbasep_as_do_poke(struct work_struct *work); - -/** Returns the name associated with a Mali exception code - * - * This function is called from the interrupt handler when a GPU fault occurs. - * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. - * - * @param[in] kbdev The kbase device that the GPU fault occurred from. - * @param[in] exception_code exception code - * @return name associated with the exception code - */ -const char *kbase_exception_name(struct kbase_device *kbdev, - u32 exception_code); - -/** - * Check whether a system suspend is in progress, or has already been suspended - * - * The caller should ensure that either kbdev->pm.active_count_lock is held, or - * a dmb was executed recently (to ensure the value is most - * up-to-date). However, without a lock the value could change afterwards. - * - * @return false if a suspend is not in progress - * @return !=false otherwise - */ -static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) -{ - return kbdev->pm.suspending; -} - -/** - * kbase_pm_is_active - Determine whether the GPU is active - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * This takes into account whether there is an active context reference. - * - * Return: true if the GPU is active, false otherwise - */ -static inline bool kbase_pm_is_active(struct kbase_device *kbdev) -{ - return kbdev->pm.active_count > 0; -} - -/** - * Return the atom's ID, as was originally supplied by userspace in - * base_jd_atom_v2::atom_number - */ -static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int result; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(katom); - KBASE_DEBUG_ASSERT(katom->kctx == kctx); - - result = katom - &kctx->jctx.atoms[0]; - KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); - return result; -} - -/** - * kbase_jd_atom_from_id - Return the atom structure for the given atom ID - * @kctx: Context pointer - * @id: ID of atom to retrieve - * - * Return: Pointer to struct kbase_jd_atom associated with the supplied ID - */ -static inline struct kbase_jd_atom *kbase_jd_atom_from_id( - struct kbase_context *kctx, int id) -{ - return &kctx->jctx.atoms[id]; -} - -/** - * Initialize the disjoint state - * - * The disjoint event count and state are both set to zero. - * - * Disjoint functions usage: - * - * The disjoint event count should be incremented whenever a disjoint event occurs. - * - * There are several cases which are regarded as disjoint behavior. Rather than just increment - * the counter during disjoint events we also increment the counter when jobs may be affected - * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. - * - * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying - * (as part of the replay workaround). Increasing the disjoint state also increases the count of - * disjoint events. - * - * The disjoint state is then used to increase the count of disjoint events during job submission - * and job completion. Any atom submitted or completed while the disjoint state is greater than - * zero is regarded as a disjoint event. - * - * The disjoint event counter is also incremented immediately whenever a job is soft stopped - * and during context creation. - * - * @param kbdev The kbase device - */ -void kbase_disjoint_init(struct kbase_device *kbdev); - -/** - * Increase the count of disjoint events - * called when a disjoint event has happened - * - * @param kbdev The kbase device - */ -void kbase_disjoint_event(struct kbase_device *kbdev); - -/** - * Increase the count of disjoint events only if the GPU is in a disjoint state - * - * This should be called when something happens which could be disjoint if the GPU - * is in a disjoint state. The state refcount keeps track of this. - * - * @param kbdev The kbase device - */ -void kbase_disjoint_event_potential(struct kbase_device *kbdev); - -/** - * Returns the count of disjoint events - * - * @param kbdev The kbase device - * @return the count of disjoint events - */ -u32 kbase_disjoint_event_get(struct kbase_device *kbdev); - -/** - * Increment the refcount state indicating that the GPU is in a disjoint state. - * - * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) - * eventually after the disjoint state has completed @ref kbase_disjoint_state_down - * should be called - * - * @param kbdev The kbase device - */ -void kbase_disjoint_state_up(struct kbase_device *kbdev); - -/** - * Decrement the refcount state - * - * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) - * - * Called after @ref kbase_disjoint_state_up once the disjoint state is over - * - * @param kbdev The kbase device - */ -void kbase_disjoint_state_down(struct kbase_device *kbdev); - -/** - * If a job is soft stopped and the number of contexts is >= this value - * it is reported as a disjoint event - */ -#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 - -#if !defined(UINT64_MAX) - #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) -#endif - -#if KBASE_TRACE_ENABLE -void kbasep_trace_debugfs_init(struct kbase_device *kbdev); - -#ifndef CONFIG_MALI_SYSTEM_TRACE -/** Add trace values about a job-slot - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) - -/** Add trace values about a job-slot, with info - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) - -/** Add trace values about a ctx refcount - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) -/** Add trace values about a ctx refcount, and info - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) - -/** Add trace values (no slot or refcount) - * - * @note Any functions called through this macro will still be evaluated in - * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any - * functions called to get the parameters supplied to this macro must: - * - be static or static inline - * - must just return 0 and have no other statements present in the body. - */ -#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ - kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ - 0, 0, 0, info_val) - -/** Clear the trace */ -#define KBASE_TRACE_CLEAR(kbdev) \ - kbasep_trace_clear(kbdev) - -/** Dump the slot trace */ -#define KBASE_TRACE_DUMP(kbdev) \ - kbasep_trace_dump(kbdev) - -/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ -void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); -/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ -void kbasep_trace_clear(struct kbase_device *kbdev); -#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ -/* Dispatch kbase trace events as system trace events */ -#include -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ - trace_mali_##code(jobslot, 0) - -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ - trace_mali_##code(jobslot, info_val) - -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ - trace_mali_##code(refcount, 0) - -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ - trace_mali_##code(refcount, info_val) - -#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\ - trace_mali_##code(gpu_addr, info_val) - -#define KBASE_TRACE_CLEAR(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#define KBASE_TRACE_DUMP(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) - -#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ -#else -#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - } while (0) - -#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(refcount);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(subcode);\ - CSTD_UNUSED(ctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_TRACE_CLEAR(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#define KBASE_TRACE_DUMP(kbdev)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(0);\ - } while (0) -#endif /* KBASE_TRACE_ENABLE */ -/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ -void kbasep_trace_dump(struct kbase_device *kbdev); - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -/* kbase_io_history_init - initialize data struct for register access history - * - * @kbdev The register history to initialize - * @n The number of register accesses that the buffer could hold - * - * @return 0 if successfully initialized, failure otherwise - */ -int kbase_io_history_init(struct kbase_io_history *h, u16 n); - -/* kbase_io_history_term - uninit all resources for the register access history - * - * @h The register history to terminate - */ -void kbase_io_history_term(struct kbase_io_history *h); - -/* kbase_io_history_dump - print the register history to the kernel ring buffer - * - * @kbdev Pointer to kbase_device containing the register history to dump - */ -void kbase_io_history_dump(struct kbase_device *kbdev); - -/** - * kbase_io_history_resize - resize the register access history buffer. - * - * @h: Pointer to a valid register history to resize - * @new_size: Number of accesses the buffer could hold - * - * A successful resize will clear all recent register accesses. - * If resizing fails for any reason (e.g., could not allocate memory, invalid - * buffer size) then the original buffer will be kept intact. - * - * @return 0 if the buffer was resized, failure otherwise - */ -int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); - -#else /* CONFIG_DEBUG_FS */ - -#define kbase_io_history_init(...) ((int)0) - -#define kbase_io_history_term CSTD_NOP - -#define kbase_io_history_dump CSTD_NOP - -#define kbase_io_history_resize CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - - -#endif - -extern int meson_gpu_data_invalid_count; -extern int meson_gpu_fault; diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.c deleted file mode 100755 index 8d71926ea575..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2015,2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include -#include -#include - -/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ -#define X_COORDINATE_MASK 0x00000FFF -#define Y_COORDINATE_MASK 0x0FFF0000 -/* Max number of words needed from the fragment shader job descriptor */ -#define JOB_HEADER_SIZE_IN_WORDS 10 -#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) - -/* Word 0: Status Word */ -#define JOB_DESC_STATUS_WORD 0 -/* Word 1: Restart Index */ -#define JOB_DESC_RESTART_INDEX_WORD 1 -/* Word 2: Fault address low word */ -#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 -/* Word 8: Minimum Tile Coordinates */ -#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 -/* Word 9: Maximum Tile Coordinates */ -#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 - -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) -{ - struct device *dev = katom->kctx->kbdev->dev; - u32 clamped = 0; - struct kbase_va_region *region; - struct tagged_addr *page_array; - u64 page_index; - u32 offset = katom->jc & (~PAGE_MASK); - u32 *page_1 = NULL; - u32 *page_2 = NULL; - u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; - void *dst = job_header; - u32 minX, minY, maxX, maxY; - u32 restartX, restartY; - struct page *p; - u32 copy_size; - - dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); - if (!(katom->core_req & BASE_JD_REQ_FS)) - return 0; - - kbase_gpu_vm_lock(katom->kctx); - region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, - katom->jc); - if (!region || (region->flags & KBASE_REG_FREE)) - goto out_unlock; - - page_array = kbase_get_cpu_phy_pages(region); - if (!page_array) - goto out_unlock; - - page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; - - p = as_page(page_array[page_index]); - - /* we need the first 10 words of the fragment shader job descriptor. - * We need to check that the offset + 10 words is less that the page - * size otherwise we need to load the next page. - * page_size_overflow will be equal to 0 in case the whole descriptor - * is within the page > 0 otherwise. - */ - copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); - - page_1 = kmap_atomic(p); - - /* page_1 is a u32 pointer, offset is expressed in bytes */ - page_1 += offset>>2; - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst, page_1, copy_size); - - /* The data needed overflows page the dimension, - * need to map the subsequent page */ - if (copy_size < JOB_HEADER_SIZE) { - p = as_page(page_array[page_index + 1]); - page_2 = kmap_atomic(p); - - kbase_sync_single_for_cpu(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); - - memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); - } - - /* We managed to correctly map one or two pages (in case of overflow) */ - /* Get Bounding Box data and restart index from fault address low word */ - minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; - minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; - maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; - maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; - restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; - restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; - - dev_warn(dev, "Before Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Set the restart index to the one which generated the fault*/ - job_header[JOB_DESC_RESTART_INDEX_WORD] = - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; - - if (restartX < minX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; - dev_warn(dev, - "Clamping restart X index to minimum. %08x clamped to %08x\n", - restartX, minX); - clamped = 1; - } - if (restartY < minY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; - dev_warn(dev, - "Clamping restart Y index to minimum. %08x clamped to %08x\n", - restartY, minY); - clamped = 1; - } - if (restartX > maxX) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; - dev_warn(dev, - "Clamping restart X index to maximum. %08x clamped to %08x\n", - restartX, maxX); - clamped = 1; - } - if (restartY > maxY) { - job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; - dev_warn(dev, - "Clamping restart Y index to maximum. %08x clamped to %08x\n", - restartY, maxY); - clamped = 1; - } - - if (clamped) { - /* Reset the fault address low word - * and set the job status to STOPPED */ - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; - job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; - dev_warn(dev, "After Clamping:\n" - "Jobstatus: %08x\n" - "restartIdx: %08x\n" - "Fault_addr_low: %08x\n" - "minCoordsX: %08x minCoordsY: %08x\n" - "maxCoordsX: %08x maxCoordsY: %08x\n", - job_header[JOB_DESC_STATUS_WORD], - job_header[JOB_DESC_RESTART_INDEX_WORD], - job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], - minX, minY, - maxX, maxY); - - /* Flush CPU cache to update memory for future GPU reads*/ - memcpy(page_1, dst, copy_size); - p = as_page(page_array[page_index]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p) + offset, - copy_size, DMA_TO_DEVICE); - - if (copy_size < JOB_HEADER_SIZE) { - memcpy(page_2, dst + copy_size, - JOB_HEADER_SIZE - copy_size); - p = as_page(page_array[page_index + 1]); - - kbase_sync_single_for_device(katom->kctx->kbdev, - kbase_dma_addr(p), - JOB_HEADER_SIZE - copy_size, - DMA_TO_DEVICE); - } - } - if (copy_size < JOB_HEADER_SIZE) - kunmap_atomic(page_2); - - kunmap_atomic(page_1); - -out_unlock: - kbase_gpu_vm_unlock(katom->kctx); - return clamped; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.h deleted file mode 100755 index 379a05a1a128..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_10969_workaround.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2014, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_10969_WORKAROUND_ -#define _KBASE_10969_WORKAROUND_ - -/** - * kbasep_10969_workaround_clamp_coordinates - Apply the WA to clamp the restart indices - * @katom: atom representing the fragment job for which the WA has to be applied - * - * This workaround is used to solve an HW issue with single iterator GPUs. - * If a fragment job is soft-stopped on the edge of its bounding box, it can happen - * that the restart index is out of bounds and the rerun causes a tile range - * fault. If this happens we try to clamp the restart index to a correct value. - */ -int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); - -#endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.c deleted file mode 100755 index 4cc93a95a456..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include - -#include -#include - -#ifdef CONFIG_DEBUG_FS -#ifdef CONFIG_MALI_DEBUG - -static int kbase_as_fault_read(struct seq_file *sfile, void *data) -{ - uintptr_t as_no = (uintptr_t) sfile->private; - - struct list_head *entry; - const struct list_head *kbdev_list; - struct kbase_device *kbdev = NULL; - - kbdev_list = kbase_dev_list_get(); - - list_for_each(entry, kbdev_list) { - kbdev = list_entry(entry, struct kbase_device, entry); - - if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { - - /* don't show this one again until another fault occors */ - kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); - - /* output the last page fault addr */ - seq_printf(sfile, "%llu\n", - (u64) kbdev->as[as_no].pf_data.addr); - } - - } - - kbase_dev_list_put(kbdev_list); - - return 0; -} - -static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) -{ - return single_open(file, kbase_as_fault_read, in->i_private); -} - -static const struct file_operations as_fault_fops = { - .open = kbase_as_fault_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -#endif /* CONFIG_MALI_DEBUG */ -#endif /* CONFIG_DEBUG_FS */ - -/* - * Initialize debugfs entry for each address space - */ -void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) -{ -#ifdef CONFIG_DEBUG_FS -#ifdef CONFIG_MALI_DEBUG - uint i; - char as_name[64]; - struct dentry *debugfs_directory; - - kbdev->debugfs_as_read_bitmap = 0ULL; - - KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); - KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); - - debugfs_directory = debugfs_create_dir("address_spaces", - kbdev->mali_debugfs_directory); - - if (debugfs_directory) { - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); - debugfs_create_file(as_name, S_IRUGO, - debugfs_directory, - (void *)(uintptr_t)i, - &as_fault_fops); - } - } else { - dev_warn(kbdev->dev, - "unable to create address_spaces debugfs directory"); - } - -#endif /* CONFIG_MALI_DEBUG */ -#endif /* CONFIG_DEBUG_FS */ - return; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.h deleted file mode 100755 index 496d8b17f240..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_as_fault_debugfs.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_AS_FAULT_DEBUG_FS_H -#define _KBASE_AS_FAULT_DEBUG_FS_H - -/** - * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults - * - * @kbdev: Pointer to kbase_device - */ -void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); - -/** - * kbase_as_fault_debugfs_new() - make the last fault available on debugfs - * - * @kbdev: Pointer to kbase_device - * @as_no: The address space the fault occurred on - */ -static inline void -kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) -{ -#ifdef CONFIG_DEBUG_FS -#ifdef CONFIG_MALI_DEBUG - kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); -#endif /* CONFIG_DEBUG_FS */ -#endif /* CONFIG_MALI_DEBUG */ - return; -} - -#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.c deleted file mode 100755 index 27a03cf02138..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Cache Policy API. - */ - -#include "mali_kbase_cache_policy.h" - -/* - * The output flags should be a combination of the following values: - * KBASE_REG_CPU_CACHED: CPU cache should be enabled - * KBASE_REG_GPU_CACHED: GPU cache should be enabled - * - * NOTE: Some components within the GPU might only be able to access memory - * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for - * more details. - */ -u32 kbase_cache_enabled(u32 flags, u32 nr_pages) -{ - u32 cache_flags = 0; - - CSTD_UNUSED(nr_pages); - - if (!(flags & BASE_MEM_UNCACHED_GPU)) - cache_flags |= KBASE_REG_GPU_CACHED; - - if (flags & BASE_MEM_CACHED_CPU) - cache_flags |= KBASE_REG_CPU_CACHED; - - return cache_flags; -} - - -void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - dma_sync_single_for_device(kbdev->dev, handle, size, dir); -} - - -void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.h deleted file mode 100755 index 8a1e5291bf5f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_cache_policy.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Cache Policy API. - */ - -#ifndef _KBASE_CACHE_POLICY_H_ -#define _KBASE_CACHE_POLICY_H_ - -#include "mali_kbase.h" -#include "mali_base_kernel.h" - -/** - * kbase_cache_enabled - Choose the cache policy for a specific region - * @flags: flags describing attributes of the region - * @nr_pages: total number of pages (backed or not) for the region - * - * Tells whether the CPU and GPU caches should be enabled or not for a specific - * region. - * This function can be modified to customize the cache policy depending on the - * flags and size of the region. - * - * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED - * depending on the cache policy - */ -u32 kbase_cache_enabled(u32 flags, u32 nr_pages); - -#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.c deleted file mode 100755 index ce7070d1d634..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include -#include -#include - -int kbasep_platform_device_init(struct kbase_device *kbdev) -{ - struct kbase_platform_funcs_conf *platform_funcs_p; - - platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; - if (platform_funcs_p && platform_funcs_p->platform_init_func) - return platform_funcs_p->platform_init_func(kbdev); - - return 0; -} - -void kbasep_platform_device_term(struct kbase_device *kbdev) -{ - struct kbase_platform_funcs_conf *platform_funcs_p; - - platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; - if (platform_funcs_p && platform_funcs_p->platform_term_func) - platform_funcs_p->platform_term_func(kbdev); -} - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.h deleted file mode 100755 index 1637fcbc4d29..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config.h +++ /dev/null @@ -1,299 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_config.h - * Configuration API and Attributes for KBase - */ - -#ifndef _KBASE_CONFIG_H_ -#define _KBASE_CONFIG_H_ - -#include -#include -#include -#include - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_config Configuration API and Attributes - * @{ - */ - -/* Forward declaration of struct kbase_device */ -struct kbase_device; - -/** - * kbase_platform_funcs_conf - Specifies platform init/term function pointers - * - * Specifies the functions pointers for platform specific initialization and - * termination. By default no functions are required. No additional platform - * specific control is necessary. - */ -struct kbase_platform_funcs_conf { - /** - * platform_init_func - platform specific init function pointer - * @kbdev - kbase_device pointer - * - * Returns 0 on success, negative error code otherwise. - * - * Function pointer for platform specific initialization or NULL if no - * initialization function is required. At the point this the GPU is - * not active and its power and clocks are in unknown (platform specific - * state) as kbase doesn't yet have control of power and clocks. - * - * The platform specific private pointer kbase_device::platform_context - * can be accessed (and possibly initialized) in here. - */ - int (*platform_init_func)(struct kbase_device *kbdev); - /** - * platform_term_func - platform specific termination function pointer - * @kbdev - kbase_device pointer - * - * Function pointer for platform specific termination or NULL if no - * termination function is required. At the point this the GPU will be - * idle but still powered and clocked. - * - * The platform specific private pointer kbase_device::platform_context - * can be accessed (and possibly terminated) in here. - */ - void (*platform_term_func)(struct kbase_device *kbdev); -}; - -/* - * @brief Specifies the callbacks for power management - * - * By default no callbacks will be made and the GPU must not be powered off. - */ -struct kbase_pm_callback_conf { - /** Callback for when the GPU is idle and the power to it can be switched off. - * - * The system integrator can decide whether to either do nothing, just switch off - * the clocks to the GPU, or to completely power down the GPU. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). - */ - void (*power_off_callback)(struct kbase_device *kbdev); - - /** Callback for when the GPU is about to become active and power must be supplied. - * - * This function must not return until the GPU is powered and clocked sufficiently for register access to - * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. - * If the GPU state has been lost then this function must return 1, otherwise it should return 0. - * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). - * - * The return value of the first call to this function is ignored. - * - * @return 1 if the GPU state may have been lost, 0 otherwise. - */ - int (*power_on_callback)(struct kbase_device *kbdev); - - /** Callback for when the system is requesting a suspend and GPU power - * must be switched off. - * - * Note that if this callback is present, then this may be called - * without a preceding call to power_off_callback. Therefore this - * callback must be able to take any action that might otherwise happen - * in power_off_callback. - * - * The platform specific private pointer kbase_device::platform_context - * can be accessed and modified in here. It is the platform \em - * callbacks responsibility to initialize and terminate this pointer if - * used (see @ref kbase_platform_funcs_conf). - */ - void (*power_suspend_callback)(struct kbase_device *kbdev); - - /** Callback for when the system is resuming from a suspend and GPU - * power must be switched on. - * - * Note that if this callback is present, then this may be called - * without a following call to power_on_callback. Therefore this - * callback must be able to take any action that might otherwise happen - * in power_on_callback. - * - * The platform specific private pointer kbase_device::platform_context - * can be accessed and modified in here. It is the platform \em - * callbacks responsibility to initialize and terminate this pointer if - * used (see @ref kbase_platform_funcs_conf). - */ - void (*power_resume_callback)(struct kbase_device *kbdev); - - /** Callback for handling runtime power management initialization. - * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * will become active from calls made to the OS from within this function. - * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. - * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. - * - * @return 0 on success, else int error code. - */ - int (*power_runtime_init_callback)(struct kbase_device *kbdev); - - /** Callback for handling runtime power management termination. - * - * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback - * should no longer be called by the OS on completion of this function. - * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. - */ - void (*power_runtime_term_callback)(struct kbase_device *kbdev); - - /** Callback for runtime power-off power management callback - * - * For linux this callback will be called by the kernel runtime_suspend callback. - * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. - * - * @return 0 on success, else OS error code. - */ - void (*power_runtime_off_callback)(struct kbase_device *kbdev); - - /** Callback for runtime power-on power management callback - * - * For linux this callback will be called by the kernel runtime_resume callback. - * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. - */ - int (*power_runtime_on_callback)(struct kbase_device *kbdev); - - /* - * Optional callback for checking if GPU can be suspended when idle - * - * This callback will be called by the runtime power management core - * when the reference count goes to 0 to provide notification that the - * GPU now seems idle. - * - * If this callback finds that the GPU can't be powered off, or handles - * suspend by powering off directly or queueing up a power off, a - * non-zero value must be returned to prevent the runtime PM core from - * also triggering a suspend. - * - * Returning 0 will cause the runtime PM core to conduct a regular - * autosuspend. - * - * This callback is optional and if not provided regular autosuspend - * will be triggered. - * - * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use - * this feature. - * - * Return 0 if GPU can be suspended, positive value if it can not be - * suspeneded by runtime PM, else OS error code - */ - int (*power_runtime_idle_callback)(struct kbase_device *kbdev); -}; - -#ifdef CONFIG_OF -struct kbase_platform_config { -}; -#else - -/* - * @brief Specifies start and end of I/O memory region. - */ -struct kbase_io_memory_region { - u64 start; - u64 end; -}; - -/* - * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. - */ -struct kbase_io_resources { - u32 job_irq_number; - u32 mmu_irq_number; - u32 gpu_irq_number; - struct kbase_io_memory_region io_memory_region; -}; - -struct kbase_platform_config { - const struct kbase_io_resources *io_resources; -}; - -#endif /* CONFIG_OF */ - -/** - * @brief Gets the pointer to platform config. - * - * @return Pointer to the platform config - */ -struct kbase_platform_config *kbase_get_platform_config(void); - -/** - * kbasep_platform_device_init: - Platform specific call to initialize hardware - * @kbdev: kbase device pointer - * - * Function calls a platform defined routine if specified in the configuration - * attributes. The routine can initialize any hardware and context state that - * is required for the GPU block to function. - * - * Return: 0 if no errors have been found in the config. - * Negative error code otherwise. - */ -int kbasep_platform_device_init(struct kbase_device *kbdev); - -/** - * kbasep_platform_device_term - Platform specific call to terminate hardware - * @kbdev: Kbase device pointer - * - * Function calls a platform defined routine if specified in the configuration - * attributes. The routine can destroy any platform specific context state and - * shut down any hardware functionality that are outside of the Power Management - * callbacks. - * - */ -void kbasep_platform_device_term(struct kbase_device *kbdev); - -#ifndef CONFIG_OF -/** - * kbase_platform_register - Register a platform device for the GPU - * - * This can be used to register a platform device on systems where device tree - * is not enabled and the platform initialisation code in the kernel doesn't - * create the GPU device. Where possible device tree should be used instead. - * - * Return: 0 for success, any other fail causes module initialisation to fail - */ -int kbase_platform_register(void); - -/** - * kbase_platform_unregister - Unregister a fake platform device - * - * Unregister the platform device created with kbase_platform_register() - */ -void kbase_platform_unregister(void); -#endif - - /** @} *//* end group kbase_config */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_CONFIG_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config_defaults.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_config_defaults.h deleted file mode 100755 index bb2ab53f06f1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_config_defaults.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_config_defaults.h - * - * Default values for configuration settings - * - */ - -#ifndef _KBASE_CONFIG_DEFAULTS_H_ -#define _KBASE_CONFIG_DEFAULTS_H_ - -/* Include mandatory definitions per platform */ -#include - -/** -* Boolean indicating whether the driver is configured to be secure at -* a potential loss of performance. -* -* This currently affects only r0p0-15dev0 HW and earlier. -* -* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and -* performance: -* -* - When this is set to true, the driver remains fully secure, -* but potentially loses performance compared with setting this to -* false. -* - When set to false, the driver is open to certain security -* attacks. -* -* From r0p0-00rel0 and onwards, there is no security loss by setting -* this to false, and no performance loss by setting it to -* true. -*/ -#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false - -enum { - /** - * Use unrestricted Address ID width on the AXI bus. - */ - KBASE_AID_32 = 0x0, - - /** - * Restrict GPU to a half of maximum Address ID count. - * This will reduce performance, but reduce bus load due to GPU. - */ - KBASE_AID_16 = 0x3, - - /** - * Restrict GPU to a quarter of maximum Address ID count. - * This will reduce performance, but reduce bus load due to GPU. - */ - KBASE_AID_8 = 0x2, - - /** - * Restrict GPU to an eighth of maximum Address ID count. - * This will reduce performance, but reduce bus load due to GPU. - */ - KBASE_AID_4 = 0x1 -}; - -enum { - /** - * Use unrestricted Address ID width on the AXI bus. - * Restricting ID width will reduce performance & bus load due to GPU. - */ - KBASE_3BIT_AID_32 = 0x0, - - /* Restrict GPU to 7/8 of maximum Address ID count. */ - KBASE_3BIT_AID_28 = 0x1, - - /* Restrict GPU to 3/4 of maximum Address ID count. */ - KBASE_3BIT_AID_24 = 0x2, - - /* Restrict GPU to 5/8 of maximum Address ID count. */ - KBASE_3BIT_AID_20 = 0x3, - - /* Restrict GPU to 1/2 of maximum Address ID count. */ - KBASE_3BIT_AID_16 = 0x4, - - /* Restrict GPU to 3/8 of maximum Address ID count. */ - KBASE_3BIT_AID_12 = 0x5, - - /* Restrict GPU to 1/4 of maximum Address ID count. */ - KBASE_3BIT_AID_8 = 0x6, - - /* Restrict GPU to 1/8 of maximum Address ID count. */ - KBASE_3BIT_AID_4 = 0x7 -}; - -/** - * Default setting for read Address ID limiting on AXI bus. - * - * Attached value: u32 register value - * KBASE_AID_32 - use the full 32 IDs (5 ID bits) - * KBASE_AID_16 - use 16 IDs (4 ID bits) - * KBASE_AID_8 - use 8 IDs (3 ID bits) - * KBASE_AID_4 - use 4 IDs (2 ID bits) - * Default value: KBASE_AID_32 (no limit). Note hardware implementation - * may limit to a lower value. - */ -#define DEFAULT_ARID_LIMIT KBASE_AID_32 - -/** - * Default setting for write Address ID limiting on AXI. - * - * Attached value: u32 register value - * KBASE_AID_32 - use the full 32 IDs (5 ID bits) - * KBASE_AID_16 - use 16 IDs (4 ID bits) - * KBASE_AID_8 - use 8 IDs (3 ID bits) - * KBASE_AID_4 - use 4 IDs (2 ID bits) - * Default value: KBASE_AID_32 (no limit). Note hardware implementation - * may limit to a lower value. - */ -#define DEFAULT_AWID_LIMIT KBASE_AID_32 - -/** - * Default setting for read Address ID limiting on AXI bus. - * - * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation - * may limit to a lower value. - */ -#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32 - -/** - * Default setting for write Address ID limiting on AXI. - * - * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation - * may limit to a lower value. - */ -#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32 - -/** - * Default period for DVFS sampling - */ -#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ - -/** - * Power Management poweroff tick granuality. This is in nanoseconds to - * allow HR timer support. - * - * On each scheduling tick, the power manager core may decide to: - * -# Power off one or more shader cores - * -# Power off the entire GPU - */ -#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ - -/** - * Power Manager number of ticks before shader cores are powered off - */ -#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ - -/** - * Default scheduling tick granuality - */ -#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ - -/** - * Default minimum number of scheduling ticks before jobs are soft-stopped. - * - * This defines the time-slice for a job (which may be different from that of a - * context) - */ -#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ - -/** - * Default minimum number of scheduling ticks before CL jobs are soft-stopped. - */ -#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ - -/** - * Default minimum number of scheduling ticks before jobs are hard-stopped - */ -#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ -#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ - -/** - * Default minimum number of scheduling ticks before CL jobs are hard-stopped. - */ -#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ - -/** - * Default minimum number of scheduling ticks before jobs are hard-stopped - * during dumping - */ -#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ - -/** - * Default timeout for some software jobs, after which the software event wait - * jobs will be cancelled. - */ -#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ - -/** - * Default minimum number of scheduling ticks before the GPU is reset to clear a - * "stuck" job - */ -#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ -#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ - -/** - * Default minimum number of scheduling ticks before the GPU is reset to clear a - * "stuck" CL job. - */ -#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ - -/** - * Default minimum number of scheduling ticks before the GPU is reset to clear a - * "stuck" job during dumping. - */ -#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ - -/** - * Default number of milliseconds given for other jobs on the GPU to be - * soft-stopped when the GPU needs to be reset. - */ -#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ - -/** - * Default timeslice that a context is scheduled in for, in nanoseconds. - * - * When a context has used up this amount of time across its jobs, it is - * scheduled out to let another run. - * - * @note the resolution is nanoseconds (ns) here, because that's the format - * often used by the OS. - */ -#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ - -/** - * Perform GPU power down using only platform specific code, skipping DDK power - * management. - * - * If this is non-zero then kbase will avoid powering down shader cores, the - * tiler, and the L2 cache, instead just powering down the entire GPU through - * platform specific code. This may be required for certain platform - * integrations. - * - * Note that as this prevents kbase from powering down shader cores, this limits - * the available power policies to coarse_demand and always_on. - */ -#define PLATFORM_POWER_DOWN_ONLY (0) - -/** - * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms - * this isn't available, so we simply define a dummy value here. If devfreq - * is enabled the value will be read from there, otherwise this should be - * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. - */ -#define DEFAULT_GPU_FREQ_KHZ_MAX (5000) - -#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.c deleted file mode 100755 index 59609d7e5c5d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel context APIs - */ - -#include -#include -#include -#include -#include - -struct kbase_context * -kbase_create_context(struct kbase_device *kbdev, bool is_compat) -{ - struct kbase_context *kctx; - int err; - struct page *p; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - /* zero-inited as lot of code assume it's zero'ed out on create */ - kctx = vzalloc(sizeof(*kctx)); - - if (!kctx) - goto out; - - /* creating a context is considered a disjoint event */ - kbase_disjoint_event(kbdev); - - kctx->kbdev = kbdev; - kctx->as_nr = KBASEP_AS_NR_INVALID; - atomic_set(&kctx->refcount, 0); - if (is_compat) - kbase_ctx_flag_set(kctx, KCTX_COMPAT); -#if defined(CONFIG_64BIT) - else - kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -#endif /* !defined(CONFIG_64BIT) */ - - atomic_set(&kctx->setup_complete, 0); - atomic_set(&kctx->setup_in_progress, 0); - spin_lock_init(&kctx->mm_update_lock); - kctx->process_mm = NULL; - atomic_set(&kctx->nonmapped_pages, 0); - kctx->slots_pullable = 0; - kctx->tgid = current->tgid; - kctx->pid = current->pid; - - err = kbase_mem_pool_init(&kctx->mem_pool, - kbdev->mem_pool_max_size_default, - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, - kctx->kbdev, - &kbdev->mem_pool); - if (err) - goto free_kctx; - - err = kbase_mem_pool_init(&kctx->lp_mem_pool, - (kbdev->mem_pool_max_size_default >> 9), - KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, - kctx->kbdev, - &kbdev->lp_mem_pool); - if (err) - goto free_mem_pool; - - err = kbase_mem_evictable_init(kctx); - if (err) - goto free_both_pools; - - atomic_set(&kctx->used_pages, 0); - - err = kbase_jd_init(kctx); - if (err) - goto deinit_evictable; - - err = kbasep_js_kctx_init(kctx); - if (err) - goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ - - err = kbase_event_init(kctx); - if (err) - goto free_jd; - - - atomic_set(&kctx->drain_pending, 0); - - mutex_init(&kctx->reg_lock); - - spin_lock_init(&kctx->mem_partials_lock); - INIT_LIST_HEAD(&kctx->mem_partials); - - INIT_LIST_HEAD(&kctx->waiting_soft_jobs); - spin_lock_init(&kctx->waiting_soft_jobs_lock); - err = kbase_dma_fence_init(kctx); - if (err) - goto free_kcpu_wq; - - err = kbase_mmu_init(kbdev, &kctx->mmu, kctx); - if (err) - goto term_dma_fence; - - p = kbase_mem_alloc_page(&kctx->mem_pool); - if (!p) - goto no_sink_page; - kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); - - init_waitqueue_head(&kctx->event_queue); - - kctx->cookies = KBASE_COOKIE_MASK; - - - /* Make sure page 0 is not used... */ - err = kbase_region_tracker_init(kctx); - if (err) - goto no_region_tracker; - - err = kbase_sticky_resource_init(kctx); - if (err) - goto no_sticky; - - err = kbase_jit_init(kctx); - if (err) - goto no_jit; -#ifdef CONFIG_GPU_TRACEPOINTS - atomic_set(&kctx->jctx.work_id, 0); -#endif - - kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; - - mutex_init(&kctx->legacy_hwcnt_lock); - - kbase_timer_setup(&kctx->soft_job_timeout, - kbasep_soft_job_timeout_worker); - - return kctx; - -no_jit: - kbase_gpu_vm_lock(kctx); - kbase_sticky_resource_term(kctx); - kbase_gpu_vm_unlock(kctx); -no_sticky: - kbase_region_tracker_term(kctx); -no_region_tracker: - kbase_mem_pool_free(&kctx->mem_pool, p, false); -no_sink_page: - kbase_mmu_term(kbdev, &kctx->mmu); -term_dma_fence: - kbase_dma_fence_term(kctx); -free_kcpu_wq: - kbase_event_cleanup(kctx); -free_jd: - /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ - kbasep_js_kctx_term(kctx); - kbase_jd_exit(kctx); -deinit_evictable: - kbase_mem_evictable_deinit(kctx); -free_both_pools: - kbase_mem_pool_term(&kctx->lp_mem_pool); -free_mem_pool: - kbase_mem_pool_term(&kctx->mem_pool); -free_kctx: - vfree(kctx); -out: - return NULL; -} -KBASE_EXPORT_SYMBOL(kbase_create_context); - -static void kbase_reg_pending_dtor(struct kbase_device *kbdev, - struct kbase_va_region *reg) -{ - dev_dbg(kbdev->dev, "Freeing pending unmapped region\n"); - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kfree(reg); -} - -void kbase_destroy_context(struct kbase_context *kctx) -{ - struct kbase_device *kbdev; - int pages; - unsigned long pending_regions_to_clean; - unsigned long flags; - struct page *p; - - KBASE_DEBUG_ASSERT(NULL != kctx); - - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(NULL != kbdev); - - KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); - - /* Ensure the core is powered up for the destroy process */ - /* A suspend won't happen here, because we're in a syscall from a userspace - * thread. */ - kbase_pm_context_active(kbdev); - - kbase_mem_pool_mark_dying(&kctx->mem_pool); - - kbase_jd_zap_context(kctx); - - /* We have already waited for the jobs to complete (and hereafter there - * can be no more submissions for the context). However the wait could - * have timedout and there could still be work items in flight that - * would do the completion processing of jobs. - * kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq - * will cause it do drain and implicitly wait for those work items to - * complete. - */ - kbase_jd_exit(kctx); - -#ifdef CONFIG_DEBUG_FS - /* Removing the rest of the debugfs entries here as we want to keep the - * atom debugfs interface alive until all atoms have completed. This - * is useful for debugging hung contexts. */ - debugfs_remove_recursive(kctx->kctx_dentry); - kbase_debug_job_fault_context_term(kctx); -#endif - - kbase_event_cleanup(kctx); - - - /* - * JIT must be terminated before the code below as it must be called - * without the region lock being held. - * The code above ensures no new JIT allocations can be made by - * by the time we get to this point of context tear down. - */ - kbase_jit_term(kctx); - - kbase_gpu_vm_lock(kctx); - - kbase_sticky_resource_term(kctx); - - /* drop the aliasing sink page now that it can't be mapped anymore */ - p = as_page(kctx->aliasing_sink_page); - kbase_mem_pool_free(&kctx->mem_pool, p, false); - - /* free pending region setups */ - pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; - while (pending_regions_to_clean) { - unsigned int cookie = __ffs(pending_regions_to_clean); - - BUG_ON(!kctx->pending_regions[cookie]); - - kbase_reg_pending_dtor(kbdev, kctx->pending_regions[cookie]); - - kctx->pending_regions[cookie] = NULL; - pending_regions_to_clean &= ~(1UL << cookie); - } - - kbase_region_tracker_term(kctx); - kbase_gpu_vm_unlock(kctx); - - - /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ - kbasep_js_kctx_term(kctx); - - kbase_dma_fence_term(kctx); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); - kbase_ctx_sched_remove_ctx(kctx); - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_mmu_term(kbdev, &kctx->mmu); - - pages = atomic_read(&kctx->used_pages); - if (pages != 0) - dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - - kbase_mem_evictable_deinit(kctx); - kbase_mem_pool_term(&kctx->mem_pool); - kbase_mem_pool_term(&kctx->lp_mem_pool); - WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); - - vfree(kctx); - - kbase_pm_context_idle(kbdev); -} -KBASE_EXPORT_SYMBOL(kbase_destroy_context); - -int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) -{ - int err = 0; - struct kbasep_js_kctx_info *js_kctx_info; - unsigned long irq_flags; - - KBASE_DEBUG_ASSERT(NULL != kctx); - - js_kctx_info = &kctx->jctx.sched_info; - - /* Validate flags */ - if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) { - err = -EINVAL; - goto out; - } - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); - - /* Translate the flags */ - if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) - kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); - - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - out: - return err; -} -KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.h deleted file mode 100755 index 30b0f649806b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_context.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_CONTEXT_H_ -#define _KBASE_CONTEXT_H_ - -#include - -/** - * kbase_create_context() - Create a kernel base context. - * @kbdev: Kbase device - * @is_compat: Force creation of a 32-bit context - * - * Allocate and init a kernel base context. - * - * Return: new kbase context - */ -struct kbase_context * -kbase_create_context(struct kbase_device *kbdev, bool is_compat); - -/** - * kbase_destroy_context - Destroy a kernel base context. - * @kctx: Context to destroy - * - * Calls kbase_destroy_os_context() to free OS specific structures. - * Will release all outstanding regions. - */ -void kbase_destroy_context(struct kbase_context *kctx); - -/** - * kbase_context_set_create_flags - Set creation flags on a context - * @kctx: Kbase context - * @flags: Flags to set, which shall be one of the flags of - * BASE_CONTEXT_CREATE_KERNEL_FLAGS. - * - * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified. - */ -int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); - -/** - * kbase_ctx_flag - Check if @flag is set on @kctx - * @kctx: Pointer to kbase context to check - * @flag: Flag to check - * - * Return: true if @flag is set on @kctx, false if not. - */ -static inline bool kbase_ctx_flag(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ - return atomic_read(&kctx->flags) & flag; -} - -/** - * kbase_ctx_flag_clear - Clear @flag on @kctx - * @kctx: Pointer to kbase context - * @flag: Flag to clear - * - * Clear the @flag on @kctx. This is done atomically, so other flags being - * cleared or set at the same time will be safe. - * - * Some flags have locking requirements, check the documentation for the - * respective flags. - */ -static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ -#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE - /* - * Earlier kernel versions doesn't have atomic_andnot() or - * atomic_and(). atomic_clear_mask() was only available on some - * architectures and removed on arm in v3.13 on arm and arm64. - * - * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, - * when atomic_andnot() becomes available. - */ - int old, new; - - do { - old = atomic_read(&kctx->flags); - new = old & ~flag; - - } while (atomic_cmpxchg(&kctx->flags, old, new) != old); -#else - atomic_andnot(flag, &kctx->flags); -#endif -} - -/** - * kbase_ctx_flag_set - Set @flag on @kctx - * @kctx: Pointer to kbase context - * @flag: Flag to clear - * - * Set the @flag on @kctx. This is done atomically, so other flags being - * cleared or set at the same time will be safe. - * - * Some flags have locking requirements, check the documentation for the - * respective flags. - */ -static inline void kbase_ctx_flag_set(struct kbase_context *kctx, - enum kbase_context_flags flag) -{ - atomic_or(flag, &kctx->flags); -} -#endif /* _KBASE_CONTEXT_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_core_linux.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_core_linux.c deleted file mode 100755 index 382285f060b5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_core_linux.c +++ /dev/null @@ -1,4333 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include -#include -#ifdef CONFIG_MALI_DEVFREQ -#include -#include -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#endif /* CONFIG_DEVFREQ_THERMAL */ -#endif /* CONFIG_MALI_DEVFREQ */ -#ifdef CONFIG_MALI_NO_MALI -#include "mali_kbase_model_linux.h" -#include -#endif /* CONFIG_MALI_NO_MALI */ -#include "mali_kbase_mem_profile_debugfs_buf_size.h" -#include "mali_kbase_debug_mem_view.h" -#include "mali_kbase_mem.h" -#include "mali_kbase_mem_pool_debugfs.h" -#if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_regs_dump_debugfs.h" -#endif /* !MALI_CUSTOMER_RELEASE */ -#include "mali_kbase_regs_history_debugfs.h" -#include -#include -#include -#include -#include -#include "mali_kbase_ioctl.h" -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_legacy.h" -#include "mali_kbase_vinstr.h" - -#ifdef CONFIG_MALI_CINSTR_GWT -#include "mali_kbase_gwt.h" -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* is_compat_task/in_compat_syscall */ -#include -#include -#include -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -#include -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -#include -#include -#include - -#include - - -#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) -#include -#else -#include -#endif - -#include - -#include - -/* GPU IRQ Tags */ -#define JOB_IRQ_TAG 0 -#define MMU_IRQ_TAG 1 -#define GPU_IRQ_TAG 2 - -static int kbase_dev_nr; - -static DEFINE_MUTEX(kbase_dev_list_lock); -static LIST_HEAD(kbase_dev_list); - -#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" - -static int kbase_api_handshake(struct kbase_context *kctx, - struct kbase_ioctl_version_check *version) -{ - switch (version->major) { - case BASE_UK_VERSION_MAJOR: - /* set minor to be the lowest common */ - version->minor = min_t(int, BASE_UK_VERSION_MINOR, - (int)version->minor); - break; - default: - /* We return our actual version regardless if it - * matches the version returned by userspace - - * userspace can bail if it can't handle this - * version - */ - version->major = BASE_UK_VERSION_MAJOR; - version->minor = BASE_UK_VERSION_MINOR; - break; - } - - /* save the proposed version number for later use */ - kctx->api_version = KBASE_API_VERSION(version->major, version->minor); - - return 0; -} - -/** - * enum mali_error - Mali error codes shared with userspace - * - * This is subset of those common Mali errors that can be returned to userspace. - * Values of matching user and kernel space enumerators MUST be the same. - * MALI_ERROR_NONE is guaranteed to be 0. - * - * @MALI_ERROR_NONE: Success - * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver - * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure - * @MALI_ERROR_FUNCTION_FAILED: Generic error code - */ -enum mali_error { - MALI_ERROR_NONE = 0, - MALI_ERROR_OUT_OF_GPU_MEMORY, - MALI_ERROR_OUT_OF_MEMORY, - MALI_ERROR_FUNCTION_FAILED, -}; - -enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - /* Bit number 2 was earlier assigned to the runtime-pm initialization - * stage (which has been merged with the backend_early stage). - */ -#ifdef CONFIG_MALI_DEVFREQ - inited_devfreq = (1u << 3), -#endif /* CONFIG_MALI_DEVFREQ */ - inited_tlstream = (1u << 4), - inited_backend_early = (1u << 5), - inited_hwcnt_gpu_iface = (1u << 6), - inited_hwcnt_gpu_ctx = (1u << 7), - inited_hwcnt_gpu_virt = (1u << 8), - inited_vinstr = (1u << 9), - inited_backend_late = (1u << 10), - inited_device = (1u << 11), - inited_job_fault = (1u << 13), - inited_sysfs_group = (1u << 14), - inited_misc_register = (1u << 15), - inited_get_device = (1u << 16), - inited_dev_list = (1u << 17), - inited_debugfs = (1u << 18), - inited_gpu_device = (1u << 19), - inited_registers_map = (1u << 20), - inited_io_history = (1u << 21), - inited_power_control = (1u << 22), - inited_buslogger = (1u << 23), - inited_protected = (1u << 24), - inited_ctx_sched = (1u << 25) -}; - -static struct kbase_device *to_kbase_device(struct device *dev) -{ - return dev_get_drvdata(dev); -} - -static int assign_irqs(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - int i; - - if (!kbdev) - return -ENODEV; - - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; - - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - return -ENOENT; - } - -#ifdef CONFIG_OF - if (!strncmp(irq_res->name, "JOB", 4)) { - irqtag = JOB_IRQ_TAG; - } else if (!strncmp(irq_res->name, "MMU", 4)) { - irqtag = MMU_IRQ_TAG; - } else if (!strncmp(irq_res->name, "GPU", 4)) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - return -EINVAL; - } -#else - irqtag = i; -#endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; - } - - return 0; -} - -/* - * API to acquire device list mutex and - * return pointer to the device list head - */ -const struct list_head *kbase_dev_list_get(void) -{ - mutex_lock(&kbase_dev_list_lock); - return &kbase_dev_list; -} -KBASE_EXPORT_TEST_API(kbase_dev_list_get); - -/* API to release the device list mutex */ -void kbase_dev_list_put(const struct list_head *dev_list) -{ - mutex_unlock(&kbase_dev_list_lock); -} -KBASE_EXPORT_TEST_API(kbase_dev_list_put); - -/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ -struct kbase_device *kbase_find_device(int minor) -{ - struct kbase_device *kbdev = NULL; - struct list_head *entry; - const struct list_head *dev_list = kbase_dev_list_get(); - - list_for_each(entry, dev_list) { - struct kbase_device *tmp; - - tmp = list_entry(entry, struct kbase_device, entry); - if (tmp->mdev.minor == minor || minor == -1) { - kbdev = tmp; - get_device(kbdev->dev); - break; - } - } - kbase_dev_list_put(dev_list); - - return kbdev; -} -EXPORT_SYMBOL(kbase_find_device); - -void kbase_release_device(struct kbase_device *kbdev) -{ - put_device(kbdev->dev); -} -EXPORT_SYMBOL(kbase_release_device); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \ - !(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)) -/* - * Older versions, before v4.6, of the kernel doesn't have - * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 - */ -static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) -{ - char buf[4]; - - count = min(count, sizeof(buf) - 1); - - if (copy_from_user(buf, s, count)) - return -EFAULT; - buf[count] = '\0'; - - return strtobool(buf, res); -} -#endif - -static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - int err; - bool value; - - err = kstrtobool_from_user(ubuf, size, &value); - if (err) - return err; - - if (value) - kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); - else - kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); - - return size; -} - -static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - char buf[32]; - int count; - bool value; - - value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); - - count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); - - return simple_read_from_buffer(ubuf, size, off, buf, count); -} - -static const struct file_operations kbase_infinite_cache_fops = { - .open = simple_open, - .write = write_ctx_infinite_cache, - .read = read_ctx_infinite_cache, -}; - -static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, - size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - int err; - bool value; - - err = kstrtobool_from_user(ubuf, size, &value); - if (err) - return err; - - if (value) { -#if defined(CONFIG_64BIT) - /* 32-bit clients cannot force SAME_VA */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - return -EINVAL; - kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -#else /* defined(CONFIG_64BIT) */ - /* 32-bit clients cannot force SAME_VA */ - return -EINVAL; -#endif /* defined(CONFIG_64BIT) */ - } else { - kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA); - } - - return size; -} - -static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, - size_t size, loff_t *off) -{ - struct kbase_context *kctx = f->private_data; - char buf[32]; - int count; - bool value; - - value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA); - - count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); - - return simple_read_from_buffer(ubuf, size, off, buf, count); -} - -static const struct file_operations kbase_force_same_va_fops = { - .open = simple_open, - .write = write_ctx_force_same_va, - .read = read_ctx_force_same_va, -}; - -static int kbase_open(struct inode *inode, struct file *filp) -{ - struct kbase_device *kbdev = NULL; - struct kbase_context *kctx; - int ret = 0; -#ifdef CONFIG_DEBUG_FS - char kctx_name[64]; -#endif - - kbdev = kbase_find_device(iminor(inode)); - - if (!kbdev) - return -ENODEV; - -#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) - kctx = kbase_create_context(kbdev, in_compat_syscall()); -#else - kctx = kbase_create_context(kbdev, is_compat_task()); -#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */ - if (!kctx) { - ret = -ENOMEM; - goto out; - } - - init_waitqueue_head(&kctx->event_queue); - filp->private_data = kctx; - filp->f_mode |= FMODE_UNSIGNED_OFFSET; - kctx->filp = filp; - - if (kbdev->infinite_cache_active_default) - kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); - -#ifdef CONFIG_DEBUG_FS - snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); - - kctx->kctx_dentry = debugfs_create_dir(kctx_name, - kbdev->debugfs_ctx_directory); - - if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { - ret = -ENOMEM; - goto out; - } - - debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, - kctx, &kbase_infinite_cache_fops); - debugfs_create_file("force_same_va", S_IRUSR | S_IWUSR, - kctx->kctx_dentry, kctx, &kbase_force_same_va_fops); - - mutex_init(&kctx->mem_profile_lock); - - kbasep_jd_debugfs_ctx_init(kctx); - kbase_debug_mem_view_init(filp); - - kbase_debug_job_fault_context_init(kctx); - - kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool); - - kbase_jit_debugfs_init(kctx); -#endif /* CONFIG_DEBUG_FS */ - - dev_dbg(kbdev->dev, "created base context\n"); - - { - struct kbasep_kctx_list_element *element; - - element = kzalloc(sizeof(*element), GFP_KERNEL); - if (element) { - mutex_lock(&kbdev->kctx_list_lock); - element->kctx = kctx; - list_add(&element->link, &kbdev->kctx_list); - KBASE_TLSTREAM_TL_NEW_CTX( - element->kctx, - element->kctx->id, - (u32)(element->kctx->tgid)); - mutex_unlock(&kbdev->kctx_list_lock); - } else { - /* we don't treat this as a fail - just warn about it */ - dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n"); - } - } - return 0; - - out: - kbase_release_device(kbdev); - return ret; -} - -static int kbase_release(struct inode *inode, struct file *filp) -{ - struct kbase_context *kctx = filp->private_data; - struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_kctx_list_element *element, *tmp; - bool found_element = false; - - KBASE_TLSTREAM_TL_DEL_CTX(kctx); - -#ifdef CONFIG_DEBUG_FS - kbasep_mem_profile_debugfs_remove(kctx); -#endif - - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { - if (element->kctx == kctx) { - list_del(&element->link); - kfree(element); - found_element = true; - } - } - mutex_unlock(&kbdev->kctx_list_lock); - if (!found_element) - dev_warn(kbdev->dev, "kctx not in kctx_list\n"); - - filp->private_data = NULL; - - mutex_lock(&kctx->legacy_hwcnt_lock); - /* If this client was performing hwcnt dumping and did not explicitly - * detach itself, destroy it now - */ - kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); - kctx->legacy_hwcnt_cli = NULL; - mutex_unlock(&kctx->legacy_hwcnt_lock); - - kbase_destroy_context(kctx); - - dev_dbg(kbdev->dev, "deleted base context\n"); - kbase_release_device(kbdev); - return 0; -} - -static int kbase_api_set_flags(struct kbase_context *kctx, - struct kbase_ioctl_set_flags *flags) -{ - int err; - - /* setup pending, try to signal that we'll do the setup, - * if setup was already in progress, err this call - */ - if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0) - return -EINVAL; - - err = kbase_context_set_create_flags(kctx, flags->create_flags); - /* if bad flags, will stay stuck in setup mode */ - if (err) - return err; - - atomic_set(&kctx->setup_complete, 1); - return 0; -} - -static int kbase_api_job_submit(struct kbase_context *kctx, - struct kbase_ioctl_job_submit *submit) -{ - return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), - submit->nr_atoms, - submit->stride, false); -} - -static int kbase_api_get_gpuprops(struct kbase_context *kctx, - struct kbase_ioctl_get_gpuprops *get_props) -{ - struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props; - int err; - - if (get_props->flags != 0) { - dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops"); - return -EINVAL; - } - - if (get_props->size == 0) - return kprops->prop_buffer_size; - if (get_props->size < kprops->prop_buffer_size) - return -EINVAL; - - err = copy_to_user(u64_to_user_ptr(get_props->buffer), - kprops->prop_buffer, - kprops->prop_buffer_size); - if (err) - return -EFAULT; - return kprops->prop_buffer_size; -} - -static int kbase_api_post_term(struct kbase_context *kctx) -{ - kbase_event_close(kctx); - return 0; -} - -static int kbase_api_mem_alloc(struct kbase_context *kctx, - union kbase_ioctl_mem_alloc *alloc) -{ - struct kbase_va_region *reg; - u64 flags = alloc->in.flags; - u64 gpu_va; - - rcu_read_lock(); - /* Don't allow memory allocation until user space has set up the - * tracking page (which sets kctx->process_mm). Also catches when we've - * forked. - */ - if (rcu_dereference(kctx->process_mm) != current->mm) { - rcu_read_unlock(); - return -EINVAL; - } - rcu_read_unlock(); - - if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) - return -ENOMEM; - - /* Force SAME_VA if a 64-bit client. - * The only exception is GPU-executable memory if an EXEC_VA zone - * has been initialized. In that case, GPU-executable memory may - * or may not be SAME_VA. - */ - if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && - kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { - if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) - flags |= BASE_MEM_SAME_VA; - } - - - reg = kbase_mem_alloc(kctx, alloc->in.va_pages, - alloc->in.commit_pages, - alloc->in.extent, - &flags, &gpu_va); - - if (!reg) - return -ENOMEM; - - alloc->out.flags = flags; - alloc->out.gpu_va = gpu_va; - - return 0; -} - -static int kbase_api_mem_query(struct kbase_context *kctx, - union kbase_ioctl_mem_query *query) -{ - return kbase_mem_query(kctx, query->in.gpu_addr, - query->in.query, &query->out.value); -} - -static int kbase_api_mem_free(struct kbase_context *kctx, - struct kbase_ioctl_mem_free *free) -{ - return kbase_mem_free(kctx, free->gpu_addr); -} - -static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); -} - -static int kbase_api_hwcnt_enable(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_enable *enable) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - if (enable->dump_buffer != 0) { - /* Non-zero dump buffer, so user wants to create the client */ - if (kctx->legacy_hwcnt_cli == NULL) { - ret = kbase_hwcnt_legacy_client_create( - kctx->kbdev->hwcnt_gpu_virt, - enable, - &kctx->legacy_hwcnt_cli); - } else { - /* This context already has a client */ - ret = -EBUSY; - } - } else { - /* Zero dump buffer, so user wants to destroy the client */ - if (kctx->legacy_hwcnt_cli != NULL) { - kbase_hwcnt_legacy_client_destroy( - kctx->legacy_hwcnt_cli); - kctx->legacy_hwcnt_cli = NULL; - ret = 0; - } else { - /* This context has no client to destroy */ - ret = -EINVAL; - } - } - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - -static int kbase_api_hwcnt_dump(struct kbase_context *kctx) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - -static int kbase_api_hwcnt_clear(struct kbase_context *kctx) -{ - int ret; - - mutex_lock(&kctx->legacy_hwcnt_lock); - ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); - mutex_unlock(&kctx->legacy_hwcnt_lock); - - return ret; -} - -#ifdef CONFIG_MALI_NO_MALI -static int kbase_api_hwcnt_set(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_values *values) -{ - gpu_model_set_dummy_prfcnt_sample( - (u32 __user *)(uintptr_t)values->data, - values->size); - - return 0; -} -#endif - -static int kbase_api_disjoint_query(struct kbase_context *kctx, - struct kbase_ioctl_disjoint_query *query) -{ - query->counter = kbase_disjoint_event_get(kctx->kbdev); - - return 0; -} - -static int kbase_api_get_ddk_version(struct kbase_context *kctx, - struct kbase_ioctl_get_ddk_version *version) -{ - int ret; - int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); - - if (version->version_buffer == 0) - return len; - - if (version->size < len) - return -EOVERFLOW; - - ret = copy_to_user(u64_to_user_ptr(version->version_buffer), - KERNEL_SIDE_DDK_VERSION_STRING, - sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); - - if (ret) - return -EFAULT; - - return len; -} - -/* Defaults for legacy JIT init ioctl */ -#define DEFAULT_MAX_JIT_ALLOCATIONS 255 -#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ - -static int kbase_api_mem_jit_init_old(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init_old *jit_init) -{ - kctx->jit_version = 1; - - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, - DEFAULT_MAX_JIT_ALLOCATIONS, - JIT_LEGACY_TRIM_LEVEL); -} - -static int kbase_api_mem_jit_init(struct kbase_context *kctx, - struct kbase_ioctl_mem_jit_init *jit_init) -{ - int i; - - kctx->jit_version = 2; - - for (i = 0; i < sizeof(jit_init->padding); i++) { - /* Ensure all padding bytes are 0 for potential future - * extension - */ - if (jit_init->padding[i]) - return -EINVAL; - } - - return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, - jit_init->max_allocations, jit_init->trim_level); -} - -static int kbase_api_mem_exec_init(struct kbase_context *kctx, - struct kbase_ioctl_mem_exec_init *exec_init) -{ - return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); -} - -static int kbase_api_mem_sync(struct kbase_context *kctx, - struct kbase_ioctl_mem_sync *sync) -{ - struct basep_syncset sset = { - .mem_handle.basep.handle = sync->handle, - .user_addr = sync->user_addr, - .size = sync->size, - .type = sync->type - }; - - return kbase_sync_now(kctx, &sset); -} - -static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, - union kbase_ioctl_mem_find_cpu_offset *find) -{ - return kbasep_find_enclosing_cpu_mapping_offset( - kctx, - find->in.cpu_addr, - find->in.size, - &find->out.offset); -} - -static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, - union kbase_ioctl_mem_find_gpu_start_and_offset *find) -{ - return kbasep_find_enclosing_gpu_mapping_start_and_offset( - kctx, - find->in.gpu_addr, - find->in.size, - &find->out.start, - &find->out.offset); -} - -static int kbase_api_get_context_id(struct kbase_context *kctx, - struct kbase_ioctl_get_context_id *info) -{ - info->id = kctx->id; - - return 0; -} - -static int kbase_api_tlstream_acquire(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_acquire *acquire) -{ - return kbase_tlstream_acquire(kctx, acquire->flags); -} - -static int kbase_api_tlstream_flush(struct kbase_context *kctx) -{ - kbase_tlstream_flush_streams(); - - return 0; -} - -static int kbase_api_mem_commit(struct kbase_context *kctx, - struct kbase_ioctl_mem_commit *commit) -{ - return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); -} - -static int kbase_api_mem_alias(struct kbase_context *kctx, - union kbase_ioctl_mem_alias *alias) -{ - struct base_mem_aliasing_info *ai; - u64 flags; - int err; - - if (alias->in.nents == 0 || alias->in.nents > 2048) - return -EINVAL; - - if (alias->in.stride > (U64_MAX / 2048)) - return -EINVAL; - - ai = vmalloc(sizeof(*ai) * alias->in.nents); - if (!ai) - return -ENOMEM; - - err = copy_from_user(ai, - u64_to_user_ptr(alias->in.aliasing_info), - sizeof(*ai) * alias->in.nents); - if (err) { - vfree(ai); - return -EFAULT; - } - - flags = alias->in.flags; - if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) { - vfree(ai); - return -EINVAL; - } - - alias->out.gpu_va = kbase_mem_alias(kctx, &flags, - alias->in.stride, alias->in.nents, - ai, &alias->out.va_pages); - - alias->out.flags = flags; - - vfree(ai); - - if (alias->out.gpu_va == 0) - return -ENOMEM; - - return 0; -} - -static int kbase_api_mem_import(struct kbase_context *kctx, - union kbase_ioctl_mem_import *import) -{ - int ret; - u64 flags = import->in.flags; - - if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) - return -ENOMEM; - - ret = kbase_mem_import(kctx, - import->in.type, - u64_to_user_ptr(import->in.phandle), - import->in.padding, - &import->out.gpu_va, - &import->out.va_pages, - &flags); - - import->out.flags = flags; - - return ret; -} - -static int kbase_api_mem_flags_change(struct kbase_context *kctx, - struct kbase_ioctl_mem_flags_change *change) -{ - if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY) - return -ENOMEM; - - return kbase_mem_flags_change(kctx, change->gpu_va, - change->flags, change->mask); -} - -static int kbase_api_stream_create(struct kbase_context *kctx, - struct kbase_ioctl_stream_create *stream) -{ -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - int fd, ret; - - /* Name must be NULL-terminated and padded with NULLs, so check last - * character is NULL - */ - if (stream->name[sizeof(stream->name)-1] != 0) - return -EINVAL; - - ret = kbase_sync_fence_stream_create(stream->name, &fd); - - if (ret) - return ret; - return fd; -#else - return -ENOENT; -#endif -} - -static int kbase_api_fence_validate(struct kbase_context *kctx, - struct kbase_ioctl_fence_validate *validate) -{ -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - return kbase_sync_fence_validate(validate->fd); -#else - return -ENOENT; -#endif -} - -static int kbase_api_mem_profile_add(struct kbase_context *kctx, - struct kbase_ioctl_mem_profile_add *data) -{ - char *buf; - int err; - - if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); - return -EINVAL; - } - - buf = kmalloc(data->len, GFP_KERNEL); - if (ZERO_OR_NULL_PTR(buf)) - return -ENOMEM; - - err = copy_from_user(buf, u64_to_user_ptr(data->buffer), - data->len); - if (err) { - kfree(buf); - return -EFAULT; - } - - return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); -} - -static int kbase_api_soft_event_update(struct kbase_context *kctx, - struct kbase_ioctl_soft_event_update *update) -{ - if (update->flags != 0) - return -EINVAL; - - return kbase_soft_event_update(kctx, update->event, update->new_status); -} - -static int kbase_api_sticky_resource_map(struct kbase_context *kctx, - struct kbase_ioctl_sticky_resource_map *map) -{ - int ret; - u64 i; - u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; - - if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) - return -EOVERFLOW; - - ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), - sizeof(u64) * map->count); - - if (ret != 0) - return -EFAULT; - - kbase_gpu_vm_lock(kctx); - - for (i = 0; i < map->count; i++) { - if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { - /* Invalid resource */ - ret = -EINVAL; - break; - } - } - - if (ret != 0) { - while (i > 0) { - i--; - kbase_sticky_resource_release(kctx, NULL, gpu_addr[i]); - } - } - - kbase_gpu_vm_unlock(kctx); - - return ret; -} - -static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, - struct kbase_ioctl_sticky_resource_unmap *unmap) -{ - int ret; - u64 i; - u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; - - if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) - return -EOVERFLOW; - - ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), - sizeof(u64) * unmap->count); - - if (ret != 0) - return -EFAULT; - - kbase_gpu_vm_lock(kctx); - - for (i = 0; i < unmap->count; i++) { - if (!kbase_sticky_resource_release(kctx, NULL, gpu_addr[i])) { - /* Invalid resource, but we keep going anyway */ - ret = -EINVAL; - } - } - - kbase_gpu_vm_unlock(kctx); - - return ret; -} - -#if MALI_UNIT_TEST -static int kbase_api_tlstream_test(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_test *test) -{ - kbase_tlstream_test( - test->tpw_count, - test->msg_delay, - test->msg_count, - test->aux_msg); - - return 0; -} - -static int kbase_api_tlstream_stats(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_stats *stats) -{ - kbase_tlstream_stats( - &stats->bytes_collected, - &stats->bytes_generated); - - return 0; -} -#endif /* MALI_UNIT_TEST */ - - -#define KBASE_HANDLE_IOCTL(cmd, function) \ - do { \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ - return function(kctx); \ - } while (0) - -#define KBASE_HANDLE_IOCTL_IN(cmd, function, type) \ - do { \ - type param; \ - int err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return function(kctx, ¶m); \ - } while (0) - -#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type) \ - do { \ - type param; \ - int ret, err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - ret = function(kctx, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return ret; \ - } while (0) - -#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type) \ - do { \ - type param; \ - int ret, err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(kctx, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return ret; \ - } while (0) - -static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct kbase_context *kctx = filp->private_data; - struct kbase_device *kbdev = kctx->kbdev; - void __user *uarg = (void __user *)arg; - - /* Only these ioctls are available until setup is complete */ - switch (cmd) { - case KBASE_IOCTL_VERSION_CHECK: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, - kbase_api_handshake, - struct kbase_ioctl_version_check); - break; - - case KBASE_IOCTL_SET_FLAGS: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, - kbase_api_set_flags, - struct kbase_ioctl_set_flags); - break; - } - - /* Block call until version handshake and setup is complete */ - if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete)) - return -EINVAL; - - /* Normal ioctls */ - switch (cmd) { - case KBASE_IOCTL_JOB_SUBMIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, - kbase_api_job_submit, - struct kbase_ioctl_job_submit); - break; - case KBASE_IOCTL_GET_GPUPROPS: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, - kbase_api_get_gpuprops, - struct kbase_ioctl_get_gpuprops); - break; - case KBASE_IOCTL_POST_TERM: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, - kbase_api_post_term); - break; - case KBASE_IOCTL_MEM_ALLOC: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, - kbase_api_mem_alloc, - union kbase_ioctl_mem_alloc); - break; - case KBASE_IOCTL_MEM_QUERY: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, - kbase_api_mem_query, - union kbase_ioctl_mem_query); - break; - case KBASE_IOCTL_MEM_FREE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, - kbase_api_mem_free, - struct kbase_ioctl_mem_free); - break; - case KBASE_IOCTL_DISJOINT_QUERY: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, - kbase_api_disjoint_query, - struct kbase_ioctl_disjoint_query); - break; - case KBASE_IOCTL_GET_DDK_VERSION: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, - kbase_api_get_ddk_version, - struct kbase_ioctl_get_ddk_version); - break; - case KBASE_IOCTL_MEM_JIT_INIT_OLD: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD, - kbase_api_mem_jit_init_old, - struct kbase_ioctl_mem_jit_init_old); - break; - case KBASE_IOCTL_MEM_JIT_INIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, - kbase_api_mem_jit_init, - struct kbase_ioctl_mem_jit_init); - break; - case KBASE_IOCTL_MEM_EXEC_INIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, - kbase_api_mem_exec_init, - struct kbase_ioctl_mem_exec_init); - break; - case KBASE_IOCTL_MEM_SYNC: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, - kbase_api_mem_sync, - struct kbase_ioctl_mem_sync); - break; - case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, - kbase_api_mem_find_cpu_offset, - union kbase_ioctl_mem_find_cpu_offset); - break; - case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, - kbase_api_mem_find_gpu_start_and_offset, - union kbase_ioctl_mem_find_gpu_start_and_offset); - break; - case KBASE_IOCTL_GET_CONTEXT_ID: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, - kbase_api_get_context_id, - struct kbase_ioctl_get_context_id); - break; - case KBASE_IOCTL_TLSTREAM_ACQUIRE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, - kbase_api_tlstream_acquire, - struct kbase_ioctl_tlstream_acquire); - break; - case KBASE_IOCTL_TLSTREAM_FLUSH: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, - kbase_api_tlstream_flush); - break; - case KBASE_IOCTL_MEM_COMMIT: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, - kbase_api_mem_commit, - struct kbase_ioctl_mem_commit); - break; - case KBASE_IOCTL_MEM_ALIAS: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, - kbase_api_mem_alias, - union kbase_ioctl_mem_alias); - break; - case KBASE_IOCTL_MEM_IMPORT: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, - kbase_api_mem_import, - union kbase_ioctl_mem_import); - break; - case KBASE_IOCTL_MEM_FLAGS_CHANGE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, - kbase_api_mem_flags_change, - struct kbase_ioctl_mem_flags_change); - break; - case KBASE_IOCTL_STREAM_CREATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, - kbase_api_stream_create, - struct kbase_ioctl_stream_create); - break; - case KBASE_IOCTL_FENCE_VALIDATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, - kbase_api_fence_validate, - struct kbase_ioctl_fence_validate); - break; - case KBASE_IOCTL_MEM_PROFILE_ADD: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, - kbase_api_mem_profile_add, - struct kbase_ioctl_mem_profile_add); - break; - case KBASE_IOCTL_SOFT_EVENT_UPDATE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, - kbase_api_soft_event_update, - struct kbase_ioctl_soft_event_update); - break; - case KBASE_IOCTL_STICKY_RESOURCE_MAP: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, - kbase_api_sticky_resource_map, - struct kbase_ioctl_sticky_resource_map); - break; - case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, - kbase_api_sticky_resource_unmap, - struct kbase_ioctl_sticky_resource_unmap); - break; - - /* Instrumentation. */ - case KBASE_IOCTL_HWCNT_READER_SETUP: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, - kbase_api_hwcnt_reader_setup, - struct kbase_ioctl_hwcnt_reader_setup); - break; - case KBASE_IOCTL_HWCNT_ENABLE: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, - kbase_api_hwcnt_enable, - struct kbase_ioctl_hwcnt_enable); - break; - case KBASE_IOCTL_HWCNT_DUMP: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, - kbase_api_hwcnt_dump); - break; - case KBASE_IOCTL_HWCNT_CLEAR: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, - kbase_api_hwcnt_clear); - break; -#ifdef CONFIG_MALI_NO_MALI - case KBASE_IOCTL_HWCNT_SET: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, - kbase_api_hwcnt_set, - struct kbase_ioctl_hwcnt_values); - break; -#endif -#ifdef CONFIG_MALI_CINSTR_GWT - case KBASE_IOCTL_CINSTR_GWT_START: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, - kbase_gpu_gwt_start); - break; - case KBASE_IOCTL_CINSTR_GWT_STOP: - KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, - kbase_gpu_gwt_stop); - break; - case KBASE_IOCTL_CINSTR_GWT_DUMP: - KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, - kbase_gpu_gwt_dump, - union kbase_ioctl_cinstr_gwt_dump); - break; -#endif -#if MALI_UNIT_TEST - case KBASE_IOCTL_TLSTREAM_TEST: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, - kbase_api_tlstream_test, - struct kbase_ioctl_tlstream_test); - break; - case KBASE_IOCTL_TLSTREAM_STATS: - KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, - kbase_api_tlstream_stats, - struct kbase_ioctl_tlstream_stats); - break; -#endif - } - - dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); - - return -ENOIOCTLCMD; -} - -static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) -{ - struct kbase_context *kctx = filp->private_data; - struct base_jd_event_v2 uevent; - int out_count = 0; - - if (count < sizeof(uevent)) - return -ENOBUFS; - - do { - while (kbase_event_dequeue(kctx, &uevent)) { - if (out_count > 0) - goto out; - - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(kctx->event_queue, - kbase_event_pending(kctx)) != 0) - return -ERESTARTSYS; - } - if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { - if (out_count == 0) - return -EPIPE; - goto out; - } - - if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) - return -EFAULT; - - buf += sizeof(uevent); - out_count++; - count -= sizeof(uevent); - } while (count >= sizeof(uevent)); - - out: - return out_count * sizeof(uevent); -} - -static unsigned int kbase_poll(struct file *filp, poll_table *wait) -{ - struct kbase_context *kctx = filp->private_data; - - poll_wait(filp, &kctx->event_queue, wait); - if (kbase_event_pending(kctx)) - return POLLIN | POLLRDNORM; - - return 0; -} - -void kbase_event_wakeup(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx); - - wake_up_interruptible(&kctx->event_queue); -} - -KBASE_EXPORT_TEST_API(kbase_event_wakeup); - -static int kbase_check_flags(int flags) -{ - /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always - * closes the file descriptor in a child process. - */ - if (0 == (flags & O_CLOEXEC)) - return -EINVAL; - - return 0; -} - -static const struct file_operations kbase_fops = { - .owner = THIS_MODULE, - .open = kbase_open, - .release = kbase_release, - .read = kbase_read, - .poll = kbase_poll, - .unlocked_ioctl = kbase_ioctl, - .compat_ioctl = kbase_ioctl, - .mmap = kbase_mmap, - .check_flags = kbase_check_flags, - .get_unmapped_area = kbase_get_unmapped_area, -}; - -/** - * show_policy - Show callback for the power_policy sysfs file. - * - * This function is called to get the contents of the power_policy sysfs - * file. This is a list of the available policies with the currently active one - * surrounded by square brackets. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) -{ - struct kbase_device *kbdev; - const struct kbase_pm_policy *current_policy; - const struct kbase_pm_policy *const *policy_list; - int policy_count; - int i; - ssize_t ret = 0; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - current_policy = kbase_pm_get_policy(kbdev); - - policy_count = kbase_pm_list_policies(&policy_list); - - for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { - if (policy_list[i] == current_policy) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); - else - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); - } - - if (ret < PAGE_SIZE - 1) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); - } else { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/** - * set_policy - Store callback for the power_policy sysfs file. - * - * This function is called when the power_policy sysfs file is written to. - * It matches the requested policy against the available policies and if a - * matching policy is found calls kbase_pm_set_policy() to change the - * policy. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - const struct kbase_pm_policy *new_policy = NULL; - const struct kbase_pm_policy *const *policy_list; - int policy_count; - int i; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - policy_count = kbase_pm_list_policies(&policy_list); - - for (i = 0; i < policy_count; i++) { - if (sysfs_streq(policy_list[i]->name, buf)) { - new_policy = policy_list[i]; - break; - } - } - - if (!new_policy) { - dev_err(dev, "power_policy: policy not found\n"); - return -EINVAL; - } - - kbase_pm_set_policy(kbdev, new_policy); - - return count; -} - -/* - * The sysfs file power_policy. - * - * This is used for obtaining information about the available policies, - * determining which policy is currently active, and changing the active - * policy. - */ -static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); - -/* - * show_core_mask - Show callback for the core_mask sysfs file. - * - * This function is called to get the contents of the core_mask sysfs file. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret = 0; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS0) : 0x%llX\n", - kbdev->pm.debug_core_mask[0]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS1) : 0x%llX\n", - kbdev->pm.debug_core_mask[1]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Current core mask (JS2) : 0x%llX\n", - kbdev->pm.debug_core_mask[2]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "Available core mask : 0x%llX\n", - kbdev->gpu_props.props.raw_props.shader_present); - - return ret; -} - -/** - * set_core_mask - Store callback for the core_mask sysfs file. - * - * This function is called when the core_mask sysfs file is written to. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - u64 new_core_mask[3]; - int items, i; - ssize_t err = count; - unsigned long flags; - u64 shader_present, group0_core_mask; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - items = sscanf(buf, "%llx %llx %llx", - &new_core_mask[0], &new_core_mask[1], - &new_core_mask[2]); - - if (items != 1 && items != 3) { - dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" - "Use format \n" - "or \n"); - err = -EINVAL; - goto end; - } - - if (items == 1) - new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - shader_present = kbdev->gpu_props.props.raw_props.shader_present; - group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; - - for (i = 0; i < 3; ++i) { - if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", - new_core_mask[i], i, shader_present); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", - new_core_mask[i], i, - kbdev->gpu_props.props.raw_props.shader_present, - kbdev->pm.backend.ca_cores_enabled); - err = -EINVAL; - goto unlock; - - } else if (!(new_core_mask[i] & group0_core_mask)) { - dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", - new_core_mask[i], i, group0_core_mask); - err = -EINVAL; - goto unlock; - } - } - - if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || - kbdev->pm.debug_core_mask[1] != - new_core_mask[1] || - kbdev->pm.debug_core_mask[2] != - new_core_mask[2]) { - - kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], - new_core_mask[1], new_core_mask[2]); - } - -unlock: - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -end: - return err; -} - -/* - * The sysfs file core_mask. - * - * This is used to restrict shader core availability for debugging purposes. - * Reading it will show the current core mask and the mask of cores available. - * Writing to it will set the current core mask. - */ -static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); - -/** - * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs - * file. - * - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. - * - * This allows setting the timeout for software jobs. Waiting soft event wait - * jobs will be cancelled after this period expires, while soft fence wait jobs - * will print debug information if the fence debug feature is enabled. - * - * This is expressed in milliseconds. - * - * Return: count if the function succeeded. An error code on failure. - */ -static ssize_t set_soft_job_timeout(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int soft_job_timeout_ms; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || - (soft_job_timeout_ms <= 0)) - return -EINVAL; - - atomic_set(&kbdev->js_data.soft_job_timeout_ms, - soft_job_timeout_ms); - - return count; -} - -/** - * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs - * file. - * - * This will return the timeout for the software jobs. - * - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer for the sysfs file contents. - * - * Return: The number of bytes output to buf. - */ -static ssize_t show_soft_job_timeout(struct device *dev, - struct device_attribute *attr, - char * const buf) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - return scnprintf(buf, PAGE_SIZE, "%i\n", - atomic_read(&kbdev->js_data.soft_job_timeout_ms)); -} - -static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, - show_soft_job_timeout, set_soft_job_timeout); - -static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, - int default_ticks, u32 old_ticks) -{ - if (timeout_ms > 0) { - u64 ticks = timeout_ms * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - if (!ticks) - return 1; - return ticks; - } else if (timeout_ms < 0) { - return default_ticks; - } else { - return old_ticks; - } -} - -/** - * set_js_timeouts - Store callback for the js_timeouts sysfs file. - * - * This function is called to get the contents of the js_timeouts sysfs - * file. This file contains five values separated by whitespace. The values - * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, - * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING - * configuration values (in that order), with the difference that the js_timeout - * values are expressed in MILLISECONDS. - * - * The js_timeouts sysfile file allows the current values in - * use by the job scheduler to get override. Note that a value needs to - * be other than 0 for it to override the current job scheduler value. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int items; - long js_soft_stop_ms; - long js_soft_stop_ms_cl; - long js_hard_stop_ms_ss; - long js_hard_stop_ms_cl; - long js_hard_stop_ms_dumping; - long js_reset_ms_ss; - long js_reset_ms_cl; - long js_reset_ms_dumping; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", - &js_soft_stop_ms, &js_soft_stop_ms_cl, - &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, - &js_hard_stop_ms_dumping, &js_reset_ms_ss, - &js_reset_ms_cl, &js_reset_ms_dumping); - - if (items == 8) { - struct kbasep_js_device_data *js_data = &kbdev->js_data; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - -#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ - js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ - default, js_data->ticks_name); \ - dev_dbg(kbdev->dev, "Overriding " #ticks_name \ - " with %lu ticks (%lu ms)\n", \ - (unsigned long)js_data->ticks_name, \ - ms_name); \ - } while (0) - - UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, - DEFAULT_JS_SOFT_STOP_TICKS); - UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, - DEFAULT_JS_SOFT_STOP_TICKS_CL); - UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? - DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : - DEFAULT_JS_HARD_STOP_TICKS_SS); - UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, - DEFAULT_JS_HARD_STOP_TICKS_CL); - UPDATE_TIMEOUT(hard_stop_ticks_dumping, - js_hard_stop_ms_dumping, - DEFAULT_JS_HARD_STOP_TICKS_DUMPING); - UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? - DEFAULT_JS_RESET_TICKS_SS_8408 : - DEFAULT_JS_RESET_TICKS_SS); - UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, - DEFAULT_JS_RESET_TICKS_CL); - UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, - DEFAULT_JS_RESET_TICKS_DUMPING); - - kbase_js_set_timeouts(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return count; - } - - dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" - "Use format \n" - "Write 0 for no change, -1 to restore default timeout\n"); - return -EINVAL; -} - -static unsigned long get_js_timeout_in_ms( - u32 scheduling_period_ns, - u32 ticks) -{ - u64 ms = (u64)ticks * scheduling_period_ns; - - do_div(ms, 1000000UL); - return ms; -} - -/** - * show_js_timeouts - Show callback for the js_timeouts sysfs file. - * - * This function is called to get the contents of the js_timeouts sysfs - * file. It returns the last set values written to the js_timeouts sysfs file. - * If the file didn't get written yet, the values will be current setting in - * use. - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - unsigned long js_soft_stop_ms; - unsigned long js_soft_stop_ms_cl; - unsigned long js_hard_stop_ms_ss; - unsigned long js_hard_stop_ms_cl; - unsigned long js_hard_stop_ms_dumping; - unsigned long js_reset_ms_ss; - unsigned long js_reset_ms_cl; - unsigned long js_reset_ms_dumping; - u32 scheduling_period_ns; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - scheduling_period_ns = kbdev->js_data.scheduling_period_ns; - -#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ - scheduling_period_ns, \ - kbdev->js_data.name) - - js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); - js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); - js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); - js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); - js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); - js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); - js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); - js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); - -#undef GET_TIMEOUT - - ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", - js_soft_stop_ms, js_soft_stop_ms_cl, - js_hard_stop_ms_ss, js_hard_stop_ms_cl, - js_hard_stop_ms_dumping, js_reset_ms_ss, - js_reset_ms_cl, js_reset_ms_dumping); - - if (ret >= PAGE_SIZE) { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/* - * The sysfs file js_timeouts. - * - * This is used to override the current job scheduler values for - * JS_STOP_STOP_TICKS_SS - * JS_STOP_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_SS - * JS_HARD_STOP_TICKS_CL - * JS_HARD_STOP_TICKS_DUMPING - * JS_RESET_TICKS_SS - * JS_RESET_TICKS_CL - * JS_RESET_TICKS_DUMPING. - */ -static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); - -static u32 get_new_js_timeout( - u32 old_period, - u32 old_ticks, - u32 new_scheduling_period_ns) -{ - u64 ticks = (u64)old_period * (u64)old_ticks; - do_div(ticks, new_scheduling_period_ns); - return ticks?ticks:1; -} - -/** - * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs - * file - * @dev: The device the sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * This function is called when the js_scheduling_period sysfs file is written - * to. It checks the data written, and if valid updates the js_scheduling_period - * value - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_js_scheduling_period(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int ret; - unsigned int js_scheduling_period; - u32 new_scheduling_period_ns; - u32 old_period; - struct kbasep_js_device_data *js_data; - unsigned long flags; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - js_data = &kbdev->js_data; - - ret = kstrtouint(buf, 0, &js_scheduling_period); - if (ret || !js_scheduling_period) { - dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" - "Use format \n"); - return -EINVAL; - } - - new_scheduling_period_ns = js_scheduling_period * 1000000; - - /* Update scheduling timeouts */ - mutex_lock(&js_data->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* If no contexts have been scheduled since js_timeouts was last written - * to, the new timeouts might not have been latched yet. So check if an - * update is pending and use the new values if necessary. */ - - /* Use previous 'new' scheduling period as a base if present. */ - old_period = js_data->scheduling_period_ns; - -#define SET_TIMEOUT(name) \ - (js_data->name = get_new_js_timeout(\ - old_period, \ - kbdev->js_data.name, \ - new_scheduling_period_ns)) - - SET_TIMEOUT(soft_stop_ticks); - SET_TIMEOUT(soft_stop_ticks_cl); - SET_TIMEOUT(hard_stop_ticks_ss); - SET_TIMEOUT(hard_stop_ticks_cl); - SET_TIMEOUT(hard_stop_ticks_dumping); - SET_TIMEOUT(gpu_reset_ticks_ss); - SET_TIMEOUT(gpu_reset_ticks_cl); - SET_TIMEOUT(gpu_reset_ticks_dumping); - -#undef SET_TIMEOUT - - js_data->scheduling_period_ns = new_scheduling_period_ns; - - kbase_js_set_timeouts(kbdev); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&js_data->runpool_mutex); - - dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", - js_scheduling_period); - - return count; -} - -/** - * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs - * entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the GPU information. - * - * This function is called to get the current period used for the JS scheduling - * period. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_js_scheduling_period(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - u32 period; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - period = kbdev->js_data.scheduling_period_ns; - - ret = scnprintf(buf, PAGE_SIZE, "%d\n", - period / 1000000); - - return ret; -} - -static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, - show_js_scheduling_period, set_js_scheduling_period); - -#if !MALI_CUSTOMER_RELEASE -/** - * set_force_replay - Store callback for the force_replay sysfs file. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - if (!strncmp("limit=", buf, MIN(6, count))) { - int force_replay_limit; - int items = sscanf(buf, "limit=%u", &force_replay_limit); - - if (items == 1) { - kbdev->force_replay_random = false; - kbdev->force_replay_limit = force_replay_limit; - kbdev->force_replay_count = 0; - - return count; - } - } else if (!strncmp("random_limit", buf, MIN(12, count))) { - kbdev->force_replay_random = true; - kbdev->force_replay_count = 0; - - return count; - } else if (!strncmp("norandom_limit", buf, MIN(14, count))) { - kbdev->force_replay_random = false; - kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED; - kbdev->force_replay_count = 0; - - return count; - } else if (!strncmp("core_req=", buf, MIN(9, count))) { - unsigned int core_req; - int items = sscanf(buf, "core_req=%x", &core_req); - - if (items == 1) { - kbdev->force_replay_core_req = (base_jd_core_req)core_req; - - return count; - } - } - dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=, random_limit, norandom_limit, core_req=\n"); - return -EINVAL; -} - -/** - * show_force_replay - Show callback for the force_replay sysfs file. - * - * This function is called to get the contents of the force_replay sysfs - * file. It returns the last set value written to the force_replay sysfs file. - * If the file didn't get written yet, the values will be 0. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_force_replay(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - if (kbdev->force_replay_random) - ret = scnprintf(buf, PAGE_SIZE, - "limit=0\nrandom_limit\ncore_req=%x\n", - kbdev->force_replay_core_req); - else - ret = scnprintf(buf, PAGE_SIZE, - "limit=%u\nnorandom_limit\ncore_req=%x\n", - kbdev->force_replay_limit, - kbdev->force_replay_core_req); - - if (ret >= PAGE_SIZE) { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/* - * The sysfs file force_replay. - */ -static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, - set_force_replay); -#endif /* !MALI_CUSTOMER_RELEASE */ - -#ifdef CONFIG_MALI_DEBUG -static ssize_t set_js_softstop_always(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int ret; - int softstop_always; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = kstrtoint(buf, 0, &softstop_always); - if (ret || ((softstop_always != 0) && (softstop_always != 1))) { - dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" - "Use format \n"); - return -EINVAL; - } - - kbdev->js_data.softstop_always = (bool) softstop_always; - dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", - (kbdev->js_data.softstop_always) ? - "Enabled" : "Disabled"); - return count; -} - -static ssize_t show_js_softstop_always(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); - - if (ret >= PAGE_SIZE) { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/* - * By default, soft-stops are disabled when only a single context is present. - * The ability to enable soft-stop when only a single context is present can be - * used for debug and unit-testing purposes. - * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) - */ -static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); -#endif /* CONFIG_MALI_DEBUG */ - -#ifdef CONFIG_MALI_DEBUG -typedef void (kbasep_debug_command_func) (struct kbase_device *); - -enum kbasep_debug_command_code { - KBASEP_DEBUG_COMMAND_DUMPTRACE, - - /* This must be the last enum */ - KBASEP_DEBUG_COMMAND_COUNT -}; - -struct kbasep_debug_command { - char *str; - kbasep_debug_command_func *func; -}; - -/* Debug commands supported by the driver */ -static const struct kbasep_debug_command debug_commands[] = { - { - .str = "dumptrace", - .func = &kbasep_trace_dump, - } -}; - -/** - * show_debug - Show callback for the debug_command sysfs file. - * - * This function is called to get the contents of the debug_command sysfs - * file. This is a list of the available debug commands, separated by newlines. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - int i; - ssize_t ret = 0; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); - - if (ret >= PAGE_SIZE) { - buf[PAGE_SIZE - 2] = '\n'; - buf[PAGE_SIZE - 1] = '\0'; - ret = PAGE_SIZE - 1; - } - - return ret; -} - -/** - * issue_debug - Store callback for the debug_command sysfs file. - * - * This function is called when the debug_command sysfs file is written to. - * It matches the requested command against the available commands, and if - * a matching command is found calls the associated function from - * @debug_commands to issue the command. - * - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int i; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { - if (sysfs_streq(debug_commands[i].str, buf)) { - debug_commands[i].func(kbdev); - return count; - } - } - - /* Debug Command not found */ - dev_err(dev, "debug_command: command not known\n"); - return -EINVAL; -} - -/* The sysfs file debug_command. - * - * This is used to issue general debug commands to the device driver. - * Reading it will produce a list of debug commands, separated by newlines. - * Writing to it with one of those commands will issue said command. - */ -static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); -#endif /* CONFIG_MALI_DEBUG */ - -/** - * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the GPU information. - * - * This function is called to get a description of the present Mali - * GPU via the gpuinfo sysfs entry. This includes the GPU family, the - * number of cores, the hardware version and the raw product id. For - * example - * - * Mali-T60x MP4 r0p0 0x6956 - * - * Return: The number of bytes output to @buf. - */ -static ssize_t kbase_show_gpuinfo(struct device *dev, - struct device_attribute *attr, char *buf) -{ - static const struct gpu_product_id_name { - unsigned id; - char *name; - } gpu_product_id_names[] = { - { .id = GPU_ID_PI_T60X, .name = "Mali-T60x" }, - { .id = GPU_ID_PI_T62X, .name = "Mali-T62x" }, - { .id = GPU_ID_PI_T72X, .name = "Mali-T72x" }, - { .id = GPU_ID_PI_T76X, .name = "Mali-T76x" }, - { .id = GPU_ID_PI_T82X, .name = "Mali-T82x" }, - { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, - { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, - { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, - { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G71" }, - { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G72" }, - { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G51" }, - { .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G76" }, - { .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G31" }, - { .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G52" }, - }; - const char *product_name = "(Unknown Mali GPU)"; - struct kbase_device *kbdev; - u32 gpu_id; - unsigned product_id, product_id_mask; - unsigned i; - bool is_new_format; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - is_new_format = GPU_ID_IS_NEW_FORMAT(product_id); - product_id_mask = - (is_new_format ? - GPU_ID2_PRODUCT_MODEL : - GPU_ID_VERSION_PRODUCT_ID) >> - GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { - const struct gpu_product_id_name *p = &gpu_product_id_names[i]; - - if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) && - (p->id & product_id_mask) == - (product_id & product_id_mask)) { - product_name = p->name; - break; - } - } - - return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", - product_name, kbdev->gpu_props.num_cores, - (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, - product_id); -} -static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); - -/** - * set_dvfs_period - Store callback for the dvfs_period sysfs file. - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * This function is called when the dvfs_period sysfs file is written to. It - * checks the data written, and if valid updates the DVFS period variable, - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_dvfs_period(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int ret; - int dvfs_period; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = kstrtoint(buf, 0, &dvfs_period); - if (ret || dvfs_period <= 0) { - dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" - "Use format \n"); - return -EINVAL; - } - - kbdev->pm.dvfs_period = dvfs_period; - dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); - - return count; -} - -/** - * show_dvfs_period - Show callback for the dvfs_period sysfs entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the GPU information. - * - * This function is called to get the current period used for the DVFS sample - * timer. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_dvfs_period(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); - - return ret; -} - -static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, - set_dvfs_period); - -/** - * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * This function is called when the pm_poweroff sysfs file is written to. - * - * This file contains three values separated by whitespace. The values - * are gpu_poweroff_time (the period of the poweroff timer, in ns), - * poweroff_shader_ticks (the number of poweroff timer ticks before an idle - * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer - * ticks before the GPU is powered off), in that order. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_pm_poweroff(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - struct kbasep_pm_tick_timer_state *stt; - int items; - u64 gpu_poweroff_time; - unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; - unsigned long flags; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, - &poweroff_shader_ticks, - &poweroff_gpu_ticks); - if (items != 3) { - dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" - "Use format \n"); - return -EINVAL; - } - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - stt = &kbdev->pm.backend.shader_tick_timer; - stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); - stt->configured_ticks = poweroff_shader_ticks; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (poweroff_gpu_ticks != 0) - dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); - - return count; -} - -/** - * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the GPU information. - * - * This function is called to get the current period used for the DVFS sample - * timer. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_pm_poweroff(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - struct kbasep_pm_tick_timer_state *stt; - ssize_t ret; - unsigned long flags; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - stt = &kbdev->pm.backend.shader_tick_timer; - ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", - ktime_to_ns(stt->configured_interval), - stt->configured_ticks); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, - set_pm_poweroff); - -/** - * set_reset_timeout - Store callback for the reset_timeout sysfs file. - * @dev: The device with sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file - * - * This function is called when the reset_timeout sysfs file is written to. It - * checks the data written, and if valid updates the reset timeout. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_reset_timeout(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int ret; - int reset_timeout; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = kstrtoint(buf, 0, &reset_timeout); - if (ret || reset_timeout <= 0) { - dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" - "Use format \n"); - return -EINVAL; - } - - kbdev->reset_timeout_ms = reset_timeout; - dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); - - return count; -} - -/** - * show_reset_timeout - Show callback for the reset_timeout sysfs entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the GPU information. - * - * This function is called to get the current reset timeout. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_reset_timeout(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); - - return ret; -} - -static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, - set_reset_timeout); - - - -static ssize_t show_mem_pool_size(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = scnprintf(buf, PAGE_SIZE, "%zu\n", - kbase_mem_pool_size(&kbdev->mem_pool)); - - return ret; -} - -static ssize_t set_mem_pool_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - size_t new_size; - int err; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - err = kstrtoul(buf, 0, (unsigned long *)&new_size); - if (err) - return err; - - kbase_mem_pool_trim(&kbdev->mem_pool, new_size); - - return count; -} - -static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, - set_mem_pool_size); - -static ssize_t show_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = scnprintf(buf, PAGE_SIZE, "%zu\n", - kbase_mem_pool_max_size(&kbdev->mem_pool)); - - return ret; -} - -static ssize_t set_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - size_t new_max_size; - int err; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - err = kstrtoul(buf, 0, (unsigned long *)&new_max_size); - if (err) - return -EINVAL; - - kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size); - - return count; -} - -static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, - set_mem_pool_max_size); - -/** - * show_lp_mem_pool_size - Show size of the large memory pages pool. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the pool size. - * - * This function is called to get the number of large memory pages which currently populate the kbdev pool. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_lp_mem_pool_size(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool)); -} - -/** - * set_lp_mem_pool_size - Set size of the large memory pages pool. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. - * - * This function is called to set the number of large memory pages which should populate the kbdev pool. - * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_lp_mem_pool_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - unsigned long new_size; - int err; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - err = kstrtoul(buf, 0, &new_size); - if (err) - return err; - - kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size); - - return count; -} - -static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size, - set_lp_mem_pool_size); - -/** - * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the pool size. - * - * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_lp_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool)); -} - -/** - * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. - * - * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_lp_mem_pool_max_size(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - unsigned long new_max_size; - int err; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - err = kstrtoul(buf, 0, &new_max_size); - if (err) - return -EINVAL; - - kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size); - - return count; -} - -static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, - set_lp_mem_pool_max_size); - -/** - * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs - * entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer to receive the context scheduling mode information. - * - * This function is called to get the context scheduling mode being used by JS. - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_js_ctx_scheduling_mode(struct device *dev, - struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); -} - -/** - * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs - * entry. - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. - * - * This function is called when the js_ctx_scheduling_mode sysfs file is written - * to. It checks the data written, and if valid updates the ctx scheduling mode - * being by JS. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t set_js_ctx_scheduling_mode(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbasep_kctx_list_element *element; - u32 new_js_ctx_scheduling_mode; - struct kbase_device *kbdev; - unsigned long flags; - int ret; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); - if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { - dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" - " write operation.\n" - "Use format \n"); - return -EINVAL; - } - - if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) - return count; - - mutex_lock(&kbdev->kctx_list_lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Update the context priority mode */ - kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; - - /* Adjust priority of all the contexts as per the new mode */ - list_for_each_entry(element, &kbdev->kctx_list, link) - kbase_js_update_ctx_priority(element->kctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->kctx_list_lock); - - dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); - - return count; -} - -static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, - show_js_ctx_scheduling_mode, - set_js_ctx_scheduling_mode); -#ifdef CONFIG_DEBUG_FS - -/* Number of entries in serialize_jobs_settings[] */ -#define NR_SERIALIZE_JOBS_SETTINGS 5 -/* Maximum string length in serialize_jobs_settings[].name */ -#define MAX_SERIALIZE_JOBS_NAME_LEN 16 - -static struct -{ - char *name; - u8 setting; -} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { - {"none", 0}, - {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, - {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, - {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, - {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | - KBASE_SERIALIZE_RESET} -}; - -/** - * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs - * file - * @sfile: seq_file pointer - * @data: Private callback data - * - * This function is called to get the contents of the serialize_jobs debugfs - * file. This is a list of the available settings with the currently active one - * surrounded by square brackets. - * - * Return: 0 on success, or an error code on error - */ -static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) -{ - struct kbase_device *kbdev = sfile->private; - int i; - - CSTD_UNUSED(data); - - for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { - if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) - seq_printf(sfile, "[%s] ", - serialize_jobs_settings[i].name); - else - seq_printf(sfile, "%s ", - serialize_jobs_settings[i].name); - } - - seq_puts(sfile, "\n"); - - return 0; -} - -/** - * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs - * debugfs file. - * @file: File pointer - * @ubuf: User buffer containing data to store - * @count: Number of bytes in user buffer - * @ppos: File position - * - * This function is called when the serialize_jobs debugfs file is written to. - * It matches the requested setting against the available settings and if a - * matching setting is found updates kbdev->serialize_jobs. - * - * Return: @count if the function succeeded. An error code on failure. - */ -static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, - const char __user *ubuf, size_t count, loff_t *ppos) -{ - struct seq_file *s = file->private_data; - struct kbase_device *kbdev = s->private; - char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; - int i; - bool valid = false; - - CSTD_UNUSED(ppos); - - count = min_t(size_t, sizeof(buf) - 1, count); - if (copy_from_user(buf, ubuf, count)) - return -EFAULT; - - buf[count] = 0; - - for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { - if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { - kbdev->serialize_jobs = - serialize_jobs_settings[i].setting; - valid = true; - break; - } - } - - if (!valid) { - dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); - return -EINVAL; - } - - return count; -} - -/** - * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs - * debugfs file - * @in: inode pointer - * @file: file pointer - * - * Return: Zero on success, error code on failure - */ -static int kbasep_serialize_jobs_debugfs_open(struct inode *in, - struct file *file) -{ - return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); -} - -static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { - .open = kbasep_serialize_jobs_debugfs_open, - .read = seq_read, - .write = kbasep_serialize_jobs_debugfs_write, - .llseek = seq_lseek, - .release = single_release, -}; - -#endif /* CONFIG_DEBUG_FS */ - -static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) -{ - struct kbase_device *kbdev = container_of(data, struct kbase_device, - protected_mode_hwcnt_disable_work); - unsigned long flags; - - bool do_disable; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - do_disable = !kbdev->protected_mode_hwcnt_desired && - !kbdev->protected_mode_hwcnt_disabled; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (!do_disable) - return; - - kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - do_disable = !kbdev->protected_mode_hwcnt_desired && - !kbdev->protected_mode_hwcnt_disabled; - - if (do_disable) { - /* Protected mode state did not change while we were doing the - * disable, so commit the work we just performed and continue - * the state machine. - */ - kbdev->protected_mode_hwcnt_disabled = true; - kbase_backend_slot_update(kbdev); - } else { - /* Protected mode state was updated while we were doing the - * disable, so we need to undo the disable we just performed. - */ - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -static int kbasep_protected_mode_init(struct kbase_device *kbdev) -{ -#ifdef CONFIG_OF - struct device_node *protected_node; - struct platform_device *pdev; - struct protected_mode_device *protected_dev; -#endif - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - /* Use native protected ops */ - kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), - GFP_KERNEL); - if (!kbdev->protected_dev) - return -ENOMEM; - kbdev->protected_dev->data = kbdev; - kbdev->protected_ops = &kbase_native_protected_ops; - kbdev->protected_mode_support = true; - INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, - kbasep_protected_mode_hwcnt_disable_worker); - kbdev->protected_mode_hwcnt_desired = true; - kbdev->protected_mode_hwcnt_disabled = false; - return 0; - } - - kbdev->protected_mode_support = false; - -#ifdef CONFIG_OF - protected_node = of_parse_phandle(kbdev->dev->of_node, - "protected-mode-switcher", 0); - - if (!protected_node) - protected_node = of_parse_phandle(kbdev->dev->of_node, - "secure-mode-switcher", 0); - - if (!protected_node) { - /* If protected_node cannot be looked up then we assume - * protected mode is not supported on this platform. */ - dev_info(kbdev->dev, "Protected mode not available\n"); - return 0; - } - - pdev = of_find_device_by_node(protected_node); - if (!pdev) - return -EINVAL; - - protected_dev = platform_get_drvdata(pdev); - if (!protected_dev) - return -EPROBE_DEFER; - - kbdev->protected_ops = &protected_dev->ops; - kbdev->protected_dev = protected_dev; - - if (kbdev->protected_ops) { - int err; - - /* Make sure protected mode is disabled on startup */ - mutex_lock(&kbdev->pm.lock); - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); - mutex_unlock(&kbdev->pm.lock); - - /* protected_mode_disable() returns -EINVAL if not supported */ - kbdev->protected_mode_support = (err != -EINVAL); - } -#endif - return 0; -} - -static void kbasep_protected_mode_term(struct kbase_device *kbdev) -{ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); - kfree(kbdev->protected_dev); - } -} - -#ifdef CONFIG_MALI_NO_MALI -static int kbase_common_reg_map(struct kbase_device *kbdev) -{ - return 0; -} -static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -{ -} -#else /* CONFIG_MALI_NO_MALI */ -static int kbase_common_reg_map(struct kbase_device *kbdev) -{ - int err = 0; - - if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { - dev_err(kbdev->dev, "Register window unavailable\n"); - err = -EIO; - goto out_region; - } - - kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); - if (!kbdev->reg) { - dev_err(kbdev->dev, "Can't remap register window\n"); - err = -EINVAL; - goto out_ioremap; - } - - return err; - - out_ioremap: - release_mem_region(kbdev->reg_start, kbdev->reg_size); - out_region: - return err; -} - -static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -{ - if (kbdev->reg) { - iounmap(kbdev->reg); - release_mem_region(kbdev->reg_start, kbdev->reg_size); - kbdev->reg = NULL; - kbdev->reg_start = 0; - kbdev->reg_size = 0; - } -} -#endif /* CONFIG_MALI_NO_MALI */ - -static int registers_map(struct kbase_device * const kbdev) -{ - - /* the first memory resource is the physical address of the GPU - * registers */ - struct platform_device *pdev = to_platform_device(kbdev->dev); - struct resource *reg_res; - int err; - - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - return -ENOENT; - } - - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); - - - err = kbase_common_reg_map(kbdev); - if (err) { - dev_err(kbdev->dev, "Failed to map registers\n"); - return err; - } - - return 0; -} - -static void registers_unmap(struct kbase_device *kbdev) -{ - kbase_common_reg_unmap(kbdev); -} - -static int power_control_init(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - int err = 0; - - if (!kbdev) - return -ENODEV; - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); - if (IS_ERR_OR_NULL(kbdev->regulator)) { - err = PTR_ERR(kbdev->regulator); - kbdev->regulator = NULL; - if (err == -EPROBE_DEFER) { - dev_err(&pdev->dev, "Failed to get regulator\n"); - return err; - } - dev_info(kbdev->dev, - "Continuing without Mali regulator control\n"); - /* Allow probe to continue without regulator */ - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - - kbdev->clock = of_clk_get(kbdev->dev->of_node, 0); - if (IS_ERR_OR_NULL(kbdev->clock)) { - err = PTR_ERR(kbdev->clock); - kbdev->clock = NULL; - if (err == -EPROBE_DEFER) { - dev_err(&pdev->dev, "Failed to get clock\n"); - goto fail; - } - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); - /* Allow probe to continue without clock. */ - } else { - err = clk_prepare_enable(kbdev->clock); - if (err) { - dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", - err); - goto fail; - } - } - -#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) - /* Register the OPPs if they are available in device tree */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \ - || defined(LSK_OPPV2_BACKPORT) - err = dev_pm_opp_of_add_table(kbdev->dev); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) - err = of_init_opp_table(kbdev->dev); -#else - err = 0; -#endif /* LINUX_VERSION_CODE */ - if (err) - dev_dbg(kbdev->dev, "OPP table not found\n"); -#endif /* CONFIG_OF && CONFIG_PM_OPP */ - - return 0; - -fail: - -if (kbdev->clock != NULL) { - clk_put(kbdev->clock); - kbdev->clock = NULL; -} - -#ifdef CONFIG_REGULATOR - if (NULL != kbdev->regulator) { - regulator_put(kbdev->regulator); - kbdev->regulator = NULL; - } -#endif - - return err; -} - -static void power_control_term(struct kbase_device *kbdev) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \ - defined(LSK_OPPV2_BACKPORT) - dev_pm_opp_of_remove_table(kbdev->dev); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) - of_free_opp_table(kbdev->dev); -#endif - - if (kbdev->clock) { - clk_disable_unprepare(kbdev->clock); - clk_put(kbdev->clock); - kbdev->clock = NULL; - } - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - if (kbdev->regulator) { - regulator_put(kbdev->regulator); - kbdev->regulator = NULL; - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -} - -#ifdef MALI_KBASE_BUILD -#ifdef CONFIG_DEBUG_FS - -#include - -static void trigger_quirks_reload(struct kbase_device *kbdev) -{ - kbase_pm_context_active(kbdev); - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - kbase_pm_context_idle(kbdev); -} - -#define MAKE_QUIRK_ACCESSORS(type) \ -static int type##_quirks_set(void *data, u64 val) \ -{ \ - struct kbase_device *kbdev; \ - kbdev = (struct kbase_device *)data; \ - kbdev->hw_quirks_##type = (u32)val; \ - trigger_quirks_reload(kbdev); \ - return 0;\ -} \ -\ -static int type##_quirks_get(void *data, u64 *val) \ -{ \ - struct kbase_device *kbdev;\ - kbdev = (struct kbase_device *)data;\ - *val = kbdev->hw_quirks_##type;\ - return 0;\ -} \ -DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ - type##_quirks_set, "%llu\n") - -MAKE_QUIRK_ACCESSORS(sc); -MAKE_QUIRK_ACCESSORS(tiler); -MAKE_QUIRK_ACCESSORS(mmu); -MAKE_QUIRK_ACCESSORS(jm); - - -/** - * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read - * @file: File object to read is for - * @buf: User buffer to populate with data - * @len: Length of user buffer - * @ppos: Offset within file object - * - * Retrieves the current status of protected debug mode - * (0 = disabled, 1 = enabled) - * - * Return: Number of bytes added to user buffer - */ -static ssize_t debugfs_protected_debug_mode_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) -{ - struct kbase_device *kbdev = (struct kbase_device *)file->private_data; - u32 gpu_status; - ssize_t ret_val; - - kbase_pm_context_active(kbdev); - gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); - kbase_pm_context_idle(kbdev); - - if (gpu_status & GPU_DBGEN) - ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); - else - ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); - - return ret_val; -} - -/* - * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops - * - * Contains the file operations for the "protected_debug_mode" debugfs file - */ -static const struct file_operations fops_protected_debug_mode = { - .open = simple_open, - .read = debugfs_protected_debug_mode_read, - .llseek = default_llseek, -}; - -static int kbase_device_debugfs_init(struct kbase_device *kbdev) -{ - struct dentry *debugfs_ctx_defaults_directory; - int err; - - kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, - NULL); - if (!kbdev->mali_debugfs_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); - err = -ENOMEM; - goto out; - } - - kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", - kbdev->mali_debugfs_directory); - if (!kbdev->debugfs_ctx_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); - err = -ENOMEM; - goto out; - } - - debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", - kbdev->debugfs_ctx_directory); - if (!debugfs_ctx_defaults_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); - err = -ENOMEM; - goto out; - } - -#if !MALI_CUSTOMER_RELEASE - kbasep_regs_dump_debugfs_init(kbdev); -#endif /* !MALI_CUSTOMER_RELEASE */ - kbasep_regs_history_debugfs_init(kbdev); - - kbase_debug_job_fault_debugfs_init(kbdev); - kbasep_gpu_memory_debugfs_init(kbdev); - kbase_as_fault_debugfs_init(kbdev); - /* fops_* variables created by invocations of macro - * MAKE_QUIRK_ACCESSORS() above. */ - debugfs_create_file("quirks_sc", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_sc_quirks); - debugfs_create_file("quirks_tiler", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_tiler_quirks); - debugfs_create_file("quirks_mmu", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_mmu_quirks); - debugfs_create_file("quirks_jm", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_jm_quirks); - - debugfs_create_bool("infinite_cache", 0644, - debugfs_ctx_defaults_directory, - &kbdev->infinite_cache_active_default); - - debugfs_create_size_t("mem_pool_max_size", 0644, - debugfs_ctx_defaults_directory, - &kbdev->mem_pool_max_size_default); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { - debugfs_create_file("protected_debug_mode", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &fops_protected_debug_mode); - } - -#if KBASE_TRACE_ENABLE - kbasep_trace_debugfs_init(kbdev); -#endif /* KBASE_TRACE_ENABLE */ - -#ifdef CONFIG_MALI_DEVFREQ -#ifdef CONFIG_DEVFREQ_THERMAL - if (kbdev->inited_subsys & inited_devfreq) - kbase_ipa_debugfs_init(kbdev); -#endif /* CONFIG_DEVFREQ_THERMAL */ -#endif /* CONFIG_MALI_DEVFREQ */ - -#ifdef CONFIG_DEBUG_FS - debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_serialize_jobs_debugfs_fops); -#endif /* CONFIG_DEBUG_FS */ - - return 0; - -out: - debugfs_remove_recursive(kbdev->mali_debugfs_directory); - return err; -} - -static void kbase_device_debugfs_term(struct kbase_device *kbdev) -{ - debugfs_remove_recursive(kbdev->mali_debugfs_directory); -} - -#else /* CONFIG_DEBUG_FS */ -static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) -{ - return 0; -} - -static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } -#endif /* CONFIG_DEBUG_FS */ -#endif /* MALI_KBASE_BUILD */ - -static void kbase_device_coherency_init(struct kbase_device *kbdev, - unsigned prod_id) -{ -#ifdef CONFIG_OF - u32 supported_coherency_bitmap = - kbdev->gpu_props.props.raw_props.coherency_mode; - const void *coherency_override_dts; - u32 override_coherency; - - /* Only for tMIx : - * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly - * documented for tMIx so force correct value here. - */ - if (GPU_ID_IS_NEW_FORMAT(prod_id) && - (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == - GPU_ID2_PRODUCT_TMIX)) - if (supported_coherency_bitmap == - COHERENCY_FEATURE_BIT(COHERENCY_ACE)) - supported_coherency_bitmap |= - COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); - -#endif /* CONFIG_OF */ - - kbdev->system_coherency = COHERENCY_NONE; - - /* device tree may override the coherency */ -#ifdef CONFIG_OF - coherency_override_dts = of_get_property(kbdev->dev->of_node, - "system-coherency", - NULL); - if (coherency_override_dts) { - - override_coherency = be32_to_cpup(coherency_override_dts); - - if ((override_coherency <= COHERENCY_NONE) && - (supported_coherency_bitmap & - COHERENCY_FEATURE_BIT(override_coherency))) { - - kbdev->system_coherency = override_coherency; - - dev_info(kbdev->dev, - "Using coherency mode %u set from dtb", - override_coherency); - } else - dev_warn(kbdev->dev, - "Ignoring unsupported coherency mode %u set from dtb", - override_coherency); - } - -#endif /* CONFIG_OF */ - - kbdev->gpu_props.props.raw_props.coherency_mode = - kbdev->system_coherency; -} - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - -/* Callback used by the kbase bus logger client, to initiate a GPU reset - * when the bus log is restarted. GPU reset is used as reference point - * in HW bus log analyses. - */ -static void kbase_logging_started_cb(void *data) -{ - struct kbase_device *kbdev = (struct kbase_device *)data; - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); -} -#endif - -static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG - &dev_attr_debug_command.attr, - &dev_attr_js_softstop_always.attr, -#endif -#if !MALI_CUSTOMER_RELEASE - &dev_attr_force_replay.attr, -#endif - &dev_attr_js_timeouts.attr, - &dev_attr_soft_job_timeout.attr, - &dev_attr_gpuinfo.attr, - &dev_attr_dvfs_period.attr, - &dev_attr_pm_poweroff.attr, - &dev_attr_reset_timeout.attr, - &dev_attr_js_scheduling_period.attr, - &dev_attr_power_policy.attr, - &dev_attr_core_mask.attr, - &dev_attr_mem_pool_size.attr, - &dev_attr_mem_pool_max_size.attr, - &dev_attr_lp_mem_pool_size.attr, - &dev_attr_lp_mem_pool_max_size.attr, - &dev_attr_js_ctx_scheduling_mode.attr, - NULL -}; - -static const struct attribute_group kbase_attr_group = { - .attrs = kbase_attrs, -}; - -static int kbase_platform_device_remove(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - const struct list_head *dev_list; - - if (!kbdev) - return -ENODEV; - - kfree(kbdev->gpu_props.prop_buffer); - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - if (kbdev->inited_subsys & inited_buslogger) { - bl_core_client_unregister(kbdev->buslogger); - kbdev->inited_subsys &= ~inited_buslogger; - } -#endif - - - if (kbdev->inited_subsys & inited_dev_list) { - dev_list = kbase_dev_list_get(); - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys &= ~inited_dev_list; - } - - if (kbdev->inited_subsys & inited_misc_register) { - misc_deregister(&kbdev->mdev); - kbdev->inited_subsys &= ~inited_misc_register; - } - - if (kbdev->inited_subsys & inited_sysfs_group) { - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); - kbdev->inited_subsys &= ~inited_sysfs_group; - } - - if (kbdev->inited_subsys & inited_get_device) { - put_device(kbdev->dev); - kbdev->inited_subsys &= ~inited_get_device; - } - -#ifdef MALI_KBASE_BUILD - if (kbdev->inited_subsys & inited_debugfs) { - kbase_device_debugfs_term(kbdev); - kbdev->inited_subsys &= ~inited_debugfs; - } -#endif - - if (kbdev->inited_subsys & inited_job_fault) { - kbase_debug_job_fault_dev_term(kbdev); - kbdev->inited_subsys &= ~inited_job_fault; - } - -#ifdef CONFIG_MALI_DEVFREQ - if (kbdev->inited_subsys & inited_devfreq) { - kbase_devfreq_term(kbdev); - kbdev->inited_subsys &= ~inited_devfreq; - } -#endif - - - if (kbdev->inited_subsys & inited_backend_late) { - kbase_backend_late_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_late; - } - - if (kbdev->inited_subsys & inited_vinstr) { - kbase_vinstr_term(kbdev->vinstr_ctx); - kbdev->inited_subsys &= ~inited_vinstr; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) { - kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) { - kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx; - } - - if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) { - kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); - kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface; - } - - if (kbdev->inited_subsys & inited_tlstream) { - kbase_tlstream_term(); - kbdev->inited_subsys &= ~inited_tlstream; - } - - /* Bring job and mem sys to a halt before we continue termination */ - - if (kbdev->inited_subsys & inited_js) - kbasep_js_devdata_halt(kbdev); - - if (kbdev->inited_subsys & inited_mem) - kbase_mem_halt(kbdev); - - if (kbdev->inited_subsys & inited_protected) { - kbasep_protected_mode_term(kbdev); - kbdev->inited_subsys &= ~inited_protected; - } - - if (kbdev->inited_subsys & inited_js) { - kbasep_js_devdata_term(kbdev); - kbdev->inited_subsys &= ~inited_js; - } - - if (kbdev->inited_subsys & inited_mem) { - kbase_mem_term(kbdev); - kbdev->inited_subsys &= ~inited_mem; - } - - if (kbdev->inited_subsys & inited_ctx_sched) { - kbase_ctx_sched_term(kbdev); - kbdev->inited_subsys &= ~inited_ctx_sched; - } - - if (kbdev->inited_subsys & inited_device) { - kbase_device_term(kbdev); - kbdev->inited_subsys &= ~inited_device; - } - - if (kbdev->inited_subsys & inited_backend_early) { - kbase_backend_early_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_early; - } - - if (kbdev->inited_subsys & inited_io_history) { - kbase_io_history_term(&kbdev->io_history); - kbdev->inited_subsys &= ~inited_io_history; - } - - if (kbdev->inited_subsys & inited_power_control) { - power_control_term(kbdev); - kbdev->inited_subsys &= ~inited_power_control; - } - - if (kbdev->inited_subsys & inited_registers_map) { - registers_unmap(kbdev); - kbdev->inited_subsys &= ~inited_registers_map; - } - -#ifdef CONFIG_MALI_NO_MALI - if (kbdev->inited_subsys & inited_gpu_device) { - gpu_device_destroy(kbdev); - kbdev->inited_subsys &= ~inited_gpu_device; - } -#endif /* CONFIG_MALI_NO_MALI */ - - if (kbdev->inited_subsys != 0) - dev_err(kbdev->dev, "Missing sub system termination\n"); - - kbase_device_free(kbdev); - - return 0; -} - - -/* Number of register accesses for the buffer that we allocate during - * initialization time. The buffer size can be changed later via debugfs. */ -#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) - -static int kbase_platform_device_probe(struct platform_device *pdev) -{ - struct kbase_device *kbdev; - struct mali_base_gpu_core_props *core_props; - u32 gpu_id; - unsigned prod_id; - const struct list_head *dev_list; - int err = 0; - - kbdev = kbase_device_alloc(); - if (!kbdev) { - dev_err(&pdev->dev, "Allocate device failed\n"); - kbase_platform_device_remove(pdev); - return -ENOMEM; - } - - kbdev->dev = &pdev->dev; - dev_set_drvdata(kbdev->dev, kbdev); - -#ifdef CONFIG_MALI_NO_MALI - err = gpu_device_create(kbdev); - if (err) { - dev_err(&pdev->dev, "Dummy model initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_gpu_device; -#endif /* CONFIG_MALI_NO_MALI */ - - err = assign_irqs(pdev); - if (err) { - dev_err(&pdev->dev, "IRQ search failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - - err = registers_map(kbdev); - if (err) { - dev_err(&pdev->dev, "Register map failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_registers_map; - - err = power_control_init(pdev); - if (err) { - dev_err(&pdev->dev, "Power control initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_power_control; - - err = kbase_io_history_init(&kbdev->io_history, - KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); - if (err) { - dev_err(&pdev->dev, "Register access history initialization failed\n"); - kbase_platform_device_remove(pdev); - return -ENOMEM; - } - kbdev->inited_subsys |= inited_io_history; - - err = kbase_backend_early_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Early backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_backend_early; - - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); - - kbase_disjoint_init(kbdev); - - /* obtain max configured gpu frequency, if devfreq is enabled then - * this will be overridden by the highest operating point found - */ - core_props = &(kbdev->gpu_props.props.core_props); -#ifdef GPU_FREQ_KHZ_MAX - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#else - core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; -#endif - - err = kbase_device_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_device; - - err = kbase_ctx_sched_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Context scheduler initialization failed (%d)\n", - err); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_ctx_sched; - - err = kbase_mem_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_mem; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - kbase_device_coherency_init(kbdev, prod_id); - - err = kbasep_protected_mode_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Protected mode subsystem initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_protected; - - dev_list = kbase_dev_list_get(); - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys |= inited_dev_list; - - err = kbasep_js_devdata_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_js; - - err = kbase_tlstream_init(); - if (err) { - dev_err(kbdev->dev, "Timeline stream initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_tlstream; - - /* Initialize the kctx list. This is used by vinstr. */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); - - err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); - if (err) { - dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_iface; - - err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, - &kbdev->hwcnt_gpu_ctx); - if (err) { - dev_err(kbdev->dev, - "GPU hwcnt context initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_ctx; - - err = kbase_hwcnt_virtualizer_init( - kbdev->hwcnt_gpu_ctx, &kbdev->hwcnt_gpu_virt); - if (err) { - dev_err(kbdev->dev, - "GPU hwcnt virtualizer initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_hwcnt_gpu_virt; - - err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); - if (err) { - dev_err(kbdev->dev, - "Virtual instrumentation initialization failed\n"); - kbase_platform_device_remove(pdev); - return -EINVAL; - } - kbdev->inited_subsys |= inited_vinstr; - - err = kbase_backend_late_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Late backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_backend_late; - - - -#ifdef CONFIG_MALI_DEVFREQ - /* Devfreq uses hardware counters, so must be initialized after it. */ - err = kbase_devfreq_init(kbdev); - if (!err) - kbdev->inited_subsys |= inited_devfreq; - else - dev_err(kbdev->dev, "Continuing without devfreq\n"); -#endif /* CONFIG_MALI_DEVFREQ */ - -#ifdef MALI_KBASE_BUILD - err = kbase_debug_job_fault_dev_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Job fault debug initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_job_fault; - - err = kbase_device_debugfs_init(kbdev); - if (err) { - dev_err(kbdev->dev, "DebugFS initialization failed"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_debugfs; - - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - kbdev->mdev.mode = 0666; - kbdev->inited_subsys |= inited_get_device; - - /* This needs to happen before registering the device with misc_register(), - * otherwise it causes a race condition between registering the device and a - * uevent event being generated for userspace, causing udev rules to run - * which might expect certain sysfs attributes present. As a result of the - * race condition we avoid, some Mali sysfs entries may have appeared to - * udev to not exist. - - * For more information, see - * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the - * paragraph that starts with "Word of warning", currently the second-last - * paragraph. - */ - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); - if (err) { - dev_err(&pdev->dev, "SysFS group creation failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_sysfs_group; - - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Misc device registration failed for %s\n", - kbdev->devname); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_misc_register; - - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) { - kbdev->inited_subsys |= inited_buslogger; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); - } else { - dev_warn(kbdev->dev, "Bus log client registration failed\n"); - err = 0; - } -#endif - - err = kbase_gpuprops_populate_user_buffer(kbdev); - if (err) { - dev_err(&pdev->dev, "GPU property population failed"); - kbase_platform_device_remove(pdev); - return err; - } - - dev_info(kbdev->dev, - "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - - kbase_dev_nr++; -#endif /* MALI_KBASE_BUILD */ - - return err; -} - -#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE - -/** - * kbase_device_suspend - Suspend callback from the OS. - * - * This is called by Linux when the device should suspend. - * - * @dev: The device to suspend - * - * Return: A standard Linux error code - */ -static int kbase_device_suspend(struct device *dev) -{ - struct kbase_device *kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) - devfreq_suspend_device(kbdev->devfreq); -#endif - - kbase_pm_suspend(kbdev); - return 0; -} - -/** - * kbase_device_resume - Resume callback from the OS. - * - * This is called by Linux when the device should resume from suspension. - * - * @dev: The device to resume - * - * Return: A standard Linux error code - */ -static int kbase_device_resume(struct device *dev) -{ - struct kbase_device *kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - kbase_pm_resume(kbdev); - -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) - devfreq_resume_device(kbdev->devfreq); -#endif - return 0; -} - -/** - * kbase_device_runtime_suspend - Runtime suspend callback from the OS. - * - * This is called by Linux when the device should prepare for a condition in - * which it will not be able to communicate with the CPU(s) and RAM due to - * power management. - * - * @dev: The device to suspend - * - * Return: A standard Linux error code - */ -#ifdef KBASE_PM_RUNTIME -static int kbase_device_runtime_suspend(struct device *dev) -{ - struct kbase_device *kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) - devfreq_suspend_device(kbdev->devfreq); -#endif - - if (kbdev->pm.backend.callback_power_runtime_off) { - kbdev->pm.backend.callback_power_runtime_off(kbdev); - dev_dbg(dev, "runtime suspend\n"); - } - return 0; -} -#endif /* KBASE_PM_RUNTIME */ - -/** - * kbase_device_runtime_resume - Runtime resume callback from the OS. - * - * This is called by Linux when the device should go into a fully active state. - * - * @dev: The device to suspend - * - * Return: A standard Linux error code - */ - -#ifdef KBASE_PM_RUNTIME -static int kbase_device_runtime_resume(struct device *dev) -{ - int ret = 0; - struct kbase_device *kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - if (kbdev->pm.backend.callback_power_runtime_on) { - ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); - dev_dbg(dev, "runtime resume\n"); - } - -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) - if (kbdev->inited_subsys & inited_devfreq) - devfreq_resume_device(kbdev->devfreq); -#endif - - return ret; -} -#endif /* KBASE_PM_RUNTIME */ - - -#ifdef KBASE_PM_RUNTIME -/** - * kbase_device_runtime_idle - Runtime idle callback from the OS. - * @dev: The device to suspend - * - * This is called by Linux when the device appears to be inactive and it might - * be placed into a low power state. - * - * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, - * otherwise a standard Linux error code - */ -static int kbase_device_runtime_idle(struct device *dev) -{ - struct kbase_device *kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - /* Use platform specific implementation if it exists. */ - if (kbdev->pm.backend.callback_power_runtime_idle) - return kbdev->pm.backend.callback_power_runtime_idle(kbdev); - - return 0; -} -#endif /* KBASE_PM_RUNTIME */ - -/* The power management operations for the platform driver. - */ -static const struct dev_pm_ops kbase_pm_ops = { - .suspend = kbase_device_suspend, - .resume = kbase_device_resume, -#ifdef KBASE_PM_RUNTIME - .runtime_suspend = kbase_device_runtime_suspend, - .runtime_resume = kbase_device_runtime_resume, - .runtime_idle = kbase_device_runtime_idle, -#endif /* KBASE_PM_RUNTIME */ -}; - -#ifdef CONFIG_OF -static const struct of_device_id kbase_dt_ids[] = { - { .compatible = "arm,malit6xx" }, - { .compatible = "arm,mali-midgard" }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, kbase_dt_ids); -#endif - -static struct platform_driver kbase_platform_driver = { - .probe = kbase_platform_device_probe, - .remove = kbase_platform_device_remove, - .driver = { - .name = kbase_drv_name, - .owner = THIS_MODULE, - .pm = &kbase_pm_ops, - .of_match_table = of_match_ptr(kbase_dt_ids), - }, -}; - -/* - * The driver will not provide a shortcut to create the Mali platform device - * anymore when using Device Tree. - */ -#ifdef CONFIG_OF -module_platform_driver(kbase_platform_driver); -#else - -static int __init kbase_driver_init(void) -{ - int ret; - - ret = kbase_platform_register(); - if (ret) - return ret; - - ret = platform_driver_register(&kbase_platform_driver); - - if (ret) - kbase_platform_unregister(); - - return ret; -} - -static void __exit kbase_driver_exit(void) -{ - platform_driver_unregister(&kbase_platform_driver); - kbase_platform_unregister(); -} - -module_init(kbase_driver_init); -module_exit(kbase_driver_exit); - -#endif /* CONFIG_OF */ - -MODULE_LICENSE("GPL"); -MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ - __stringify(BASE_UK_VERSION_MAJOR) "." \ - __stringify(BASE_UK_VERSION_MINOR) ")"); - -#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE) -#define CREATE_TRACE_POINTS -#endif - -#ifdef CONFIG_MALI_GATOR_SUPPORT -/* Create the trace points (otherwise we just get code to call a tracepoint) */ -#include "mali_linux_trace.h" - -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); -EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); - -void kbase_trace_mali_pm_status(u32 event, u64 value) -{ - trace_mali_pm_status(event, value); -} - -void kbase_trace_mali_pm_power_off(u32 event, u64 value) -{ - trace_mali_pm_power_off(event, value); -} - -void kbase_trace_mali_pm_power_on(u32 event, u64 value) -{ - trace_mali_pm_power_on(event, value); -} - -void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id) -{ - trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); -} - -void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) -{ - trace_mali_page_fault_insert_pages(event, value); -} - -void kbase_trace_mali_mmu_as_in_use(int event) -{ - trace_mali_mmu_as_in_use(event); -} - -void kbase_trace_mali_mmu_as_released(int event) -{ - trace_mali_mmu_as_released(event); -} - -void kbase_trace_mali_total_alloc_pages_change(long long int event) -{ - trace_mali_total_alloc_pages_change(event); -} -#endif /* CONFIG_MALI_GATOR_SUPPORT */ -#ifdef CONFIG_MALI_SYSTEM_TRACE -#include "mali_linux_kbase_trace.h" -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.c deleted file mode 100755 index bda05602de5e..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include - -#include "mali_kbase_ctx_sched.h" - -int kbase_ctx_sched_init(struct kbase_device *kbdev) -{ - int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; - - /* These two must be recalculated if nr_hw_address_spaces changes - * (e.g. for HW workarounds) */ - kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { - bool use_workaround; - - use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE; - if (use_workaround) { - dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only"); - kbdev->nr_user_address_spaces = 1; - } - } - - kbdev->as_free = as_present; /* All ASs initially free */ - - memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); - - return 0; -} - -void kbase_ctx_sched_term(struct kbase_device *kbdev) -{ - s8 i; - - /* Sanity checks */ - for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { - WARN_ON(kbdev->as_to_kctx[i] != NULL); - WARN_ON(!(kbdev->as_free & (1u << i))); - } -} - -/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space - * - * @kbdev: The context for which to find a free address space - * - * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID - * - * This function returns an address space available for use. It would prefer - * returning an AS that has been previously assigned to the context to - * avoid having to reprogram the MMU. - */ -static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) -{ - struct kbase_device *const kbdev = kctx->kbdev; - int free_as; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* First check if the previously assigned AS is available */ - if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && - (kbdev->as_free & (1u << kctx->as_nr))) - return kctx->as_nr; - - /* The previously assigned AS was taken, we'll be returning any free - * AS at this point. - */ - free_as = ffs(kbdev->as_free) - 1; - if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) - return free_as; - - return KBASEP_AS_NR_INVALID; -} - -int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) -{ - struct kbase_device *const kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ON(!kbdev->pm.backend.gpu_powered); - - if (atomic_inc_return(&kctx->refcount) == 1) { - int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); - - if (free_as != KBASEP_AS_NR_INVALID) { - kbdev->as_free &= ~(1u << free_as); - /* Only program the MMU if the context has not been - * assigned the same address space before. - */ - if (free_as != kctx->as_nr) { - struct kbase_context *const prev_kctx = - kbdev->as_to_kctx[free_as]; - - if (prev_kctx) { - WARN_ON(atomic_read(&prev_kctx->refcount) != 0); - kbase_mmu_disable(prev_kctx); - prev_kctx->as_nr = KBASEP_AS_NR_INVALID; - } - - kctx->as_nr = free_as; - kbdev->as_to_kctx[free_as] = kctx; - kbase_mmu_update(kbdev, &kctx->mmu, - kctx->as_nr); - } - } else { - atomic_dec(&kctx->refcount); - - /* Failed to find an available address space, we must - * be returning an error at this point. - */ - WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); - } - } - - return kctx->as_nr; -} - -void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) -{ - struct kbase_device *const kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->hwaccess_lock); - WARN_ON(atomic_read(&kctx->refcount) == 0); - WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); - WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); - - atomic_inc(&kctx->refcount); -} - -void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) -{ - struct kbase_device *const kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (atomic_dec_return(&kctx->refcount) == 0) - kbdev->as_free |= (1u << kctx->as_nr); -} - -void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) -{ - struct kbase_device *const kbdev = kctx->kbdev; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ON(atomic_read(&kctx->refcount) != 0); - - if (kctx->as_nr != KBASEP_AS_NR_INVALID) { - if (kbdev->pm.backend.gpu_powered) - kbase_mmu_disable(kctx); - - kbdev->as_to_kctx[kctx->as_nr] = NULL; - kctx->as_nr = KBASEP_AS_NR_INVALID; - } -} - -void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) -{ - s8 i; - - lockdep_assert_held(&kbdev->mmu_hw_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ON(!kbdev->pm.backend.gpu_powered); - - for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { - struct kbase_context *kctx; - - kctx = kbdev->as_to_kctx[i]; - if (kctx) { - if (atomic_read(&kctx->refcount)) { - WARN_ON(kctx->as_nr != i); - - kbase_mmu_update(kbdev, &kctx->mmu, - kctx->as_nr); - } else { - /* This context might have been assigned an - * AS before, clear it. - */ - kbdev->as_to_kctx[kctx->as_nr] = NULL; - kctx->as_nr = KBASEP_AS_NR_INVALID; - } - } else { - kbase_mmu_disable_as(kbdev, i); - } - } -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.h deleted file mode 100755 index ab57a0dc1ca8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ctx_sched.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_CTX_SCHED_H_ -#define _KBASE_CTX_SCHED_H_ - -#include - -/** - * The Context Scheduler manages address space assignment and reference - * counting to kbase_context. The interface has been designed to minimise - * interactions between the Job Scheduler and Power Management/MMU to support - * the existing Job Scheduler interface. - * - * The initial implementation of the Context Scheduler does not schedule - * contexts. Instead it relies on the Job Scheduler to make decisions of - * when to schedule/evict contexts if address spaces are starved. In the - * future, once an interface between the CS and JS has been devised to - * provide enough information about how each context is consuming GPU resources, - * those decisions can be made in the CS itself, thereby reducing duplicated - * code. - */ - -/** - * kbase_ctx_sched_init - Initialise the context scheduler - * @kbdev: The device for which the context scheduler needs to be initialised - * - * This must be called during device initialisation. The number of hardware - * address spaces must already be established before calling this function. - * - * Return: 0 for success, otherwise failure - */ -int kbase_ctx_sched_init(struct kbase_device *kbdev); - -/** - * kbase_ctx_sched_term - Terminate the context scheduler - * @kbdev: The device for which the context scheduler needs to be terminated - * - * This must be called during device termination after all contexts have been - * destroyed. - */ -void kbase_ctx_sched_term(struct kbase_device *kbdev); - -/** - * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context - * @kctx: The context to which to retain a reference - * - * This function should be called whenever an address space should be assigned - * to a context and programmed onto the MMU. It should typically be called - * when jobs are ready to be submitted to the GPU. - * - * It can be called as many times as necessary. The address space will be - * assigned to the context for as long as there is a reference to said context. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. - * - * Return: The address space that the context has been assigned to or - * KBASEP_AS_NR_INVALID if no address space was available. - */ -int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); - -/** - * kbase_ctx_sched_retain_ctx_refcount - * @kctx: The context to which to retain a reference - * - * This function only retains a reference to the context. It must be called - * only when the context already has a reference. - * - * This is typically called inside an atomic session where we know the context - * is already scheduled in but want to take an extra reference to ensure that - * it doesn't get descheduled. - * - * The kbase_device::hwaccess_lock must be held whilst calling this function - */ -void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); - -/** - * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context - * @kctx: The context from which to release a reference - * - * This function should be called whenever an address space could be unassigned - * from a context. When there are no more references to said context, the - * address space previously assigned to this context shall be reassigned to - * other contexts as needed. - * - * The kbase_device::hwaccess_lock must be held whilst calling this function - */ -void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); - -/** - * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space - * @kctx: The context to be removed - * - * This function should be called when a context is being destroyed. The - * context must no longer have any reference. If it has been assigned an - * address space before then the AS will be unprogrammed. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. - */ -void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); - -/** - * kbase_ctx_sched_restore_all_as - Reprogram all address spaces - * @kbdev: The device for which address spaces to be reprogrammed - * - * This function shall reprogram all address spaces previously assigned to - * contexts. It can be used after the GPU is reset. - * - * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be - * held whilst calling this function. - */ -void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); - -#endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.c deleted file mode 100755 index 118f787fb74c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include - -static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { - NULL, - NULL -}; - -void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) -{ - kbasep_debug_assert_registered_cb.func = func; - kbasep_debug_assert_registered_cb.param = param; -} - -void kbasep_debug_assert_call_hook(void) -{ - if (kbasep_debug_assert_registered_cb.func != NULL) - kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); -} -KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.h deleted file mode 100755 index 2fdb72d943e4..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _KBASE_DEBUG_H -#define _KBASE_DEBUG_H - -#include - -/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ -#define KBASE_DEBUG_SKIP_TRACE 0 - -/** @brief If different from 0, the trace will only contain the file and line. */ -#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 - -/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ -#ifndef KBASE_DEBUG_DISABLE_ASSERTS -#ifdef CONFIG_MALI_DEBUG -#define KBASE_DEBUG_DISABLE_ASSERTS 0 -#else -#define KBASE_DEBUG_DISABLE_ASSERTS 1 -#endif -#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ - -/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ -typedef void (kbase_debug_assert_hook) (void *); - -struct kbasep_debug_assert_cb { - kbase_debug_assert_hook *func; - void *param; -}; - -/** - * @def KBASEP_DEBUG_PRINT_TRACE - * @brief Private macro containing the format of the trace to display before every message - * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME - */ -#if !KBASE_DEBUG_SKIP_TRACE -#define KBASEP_DEBUG_PRINT_TRACE \ - "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) -#if !KBASE_DEBUG_SKIP_FUNCTION_NAME -#define KBASEP_DEBUG_PRINT_FUNCTION __func__ -#else -#define KBASEP_DEBUG_PRINT_FUNCTION "" -#endif -#else -#define KBASEP_DEBUG_PRINT_TRACE "" -#endif - -/** - * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. - * @param trace location in the code from where the message is printed - * @param function function from where the message is printed - * @param ... Format string followed by format arguments. - * @note function parameter cannot be concatenated with other strings - */ -/* Select the correct system output function*/ -#ifdef CONFIG_MALI_DEBUG -#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ - do { \ - pr_err("Mali: %s function:%s ", trace, function);\ - pr_err(__VA_ARGS__);\ - pr_err("\n");\ - } while (false) -#else -#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() -#endif - -#ifdef CONFIG_MALI_DEBUG -#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() -#else -#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() -#endif - -/** - * @def KBASE_DEBUG_ASSERT(expr) - * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false - * - * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 - * - * @param expr Boolean expression - */ -#define KBASE_DEBUG_ASSERT(expr) \ - KBASE_DEBUG_ASSERT_MSG(expr, #expr) - -#if KBASE_DEBUG_DISABLE_ASSERTS -#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() -#else - /** - * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) - * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false - * - * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 - * - * @param expr Boolean expression - * @param ... Message to display when @a expr is false, as a format string followed by format arguments. - */ -#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ - do { \ - if (!(expr)) { \ - KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ - KBASE_CALL_ASSERT_HOOK();\ - BUG();\ - } \ - } while (false) -#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ - -/** - * @def KBASE_DEBUG_CODE( X ) - * @brief Executes the code inside the macro only in debug mode - * - * @param X Code to compile only in debug mode. - */ -#ifdef CONFIG_MALI_DEBUG -#define KBASE_DEBUG_CODE(X) X -#else -#define KBASE_DEBUG_CODE(X) CSTD_NOP() -#endif /* CONFIG_MALI_DEBUG */ - -/** @} */ - -/** - * @brief Register a function to call on ASSERT - * - * Such functions will \b only be called during Debug mode, and for debugging - * features \b only. Do not rely on them to be called in general use. - * - * To disable the hook, supply NULL to \a func. - * - * @note This function is not thread-safe, and should only be used to - * register/deregister once in the module's lifetime. - * - * @param[in] func the function to call when an assert is triggered. - * @param[in] param the parameter to pass to \a func when calling it - */ -void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); - -/** - * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() - * - * @note This function is not thread-safe with respect to multiple threads - * registering functions and parameters with - * kbase_debug_assert_register_hook(). Otherwise, thread safety is the - * responsibility of the registered hook. - */ -void kbasep_debug_assert_call_hook(void); - -#endif /* _KBASE_DEBUG_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.c deleted file mode 100755 index 88bb0d38d5a8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.c +++ /dev/null @@ -1,538 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include - -#ifdef CONFIG_DEBUG_FS - -static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) -{ - struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; - bool ret; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - ret = !list_empty(event_list); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - - return ret; -} - -static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) -{ - struct list_head *event_list = &kctx->kbdev->job_fault_event_list; - struct base_job_fault_event *event; - unsigned long flags; - - spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); - list_for_each_entry(event, event_list, head) { - if (event->katom->kctx == kctx) { - list_del(&event->head); - spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); - - wake_up(&kctx->kbdev->job_fault_resume_wq); - flush_work(&event->job_fault_work); - - /* job_fault_event_list can only have a single atom for - * each context. - */ - return; - } - } - spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); -} - -static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - struct list_head *event_list = &kctx->kbdev->job_fault_event_list; - struct base_job_fault_event *event; - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - if (list_empty(event_list)) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - return true; - } - list_for_each_entry(event, event_list, head) { - if (event->katom->kctx == kctx) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, - flags); - return false; - } - } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - return true; -} - -/* wait until the fault happen and copy the event */ -static int kbase_job_fault_event_wait(struct kbase_device *kbdev, - struct base_job_fault_event *event) -{ - struct list_head *event_list = &kbdev->job_fault_event_list; - struct base_job_fault_event *event_in; - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - while (list_empty(event_list)) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - if (wait_event_interruptible(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(kbdev))) - return -ERESTARTSYS; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - } - - event_in = list_entry(event_list->next, - struct base_job_fault_event, head); - event->event_code = event_in->event_code; - event->katom = event_in->katom; - - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - - return 0; - -} - -/* remove the event from the queue */ -static struct base_job_fault_event *kbase_job_fault_event_dequeue( - struct kbase_device *kbdev, struct list_head *event_list) -{ - struct base_job_fault_event *event; - - event = list_entry(event_list->next, - struct base_job_fault_event, head); - list_del(event_list->next); - - return event; - -} - -/* Remove all the following atoms after the failed atom in the same context - * Call the postponed bottom half of job done. - * Then, this context could be rescheduled. - */ -static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) -{ - struct list_head *event_list = &kctx->job_fault_resume_event_list; - - while (!list_empty(event_list)) { - struct base_job_fault_event *event; - - event = kbase_job_fault_event_dequeue(kctx->kbdev, - &kctx->job_fault_resume_event_list); - kbase_jd_done_worker(&event->katom->work); - } - -} - -/* Remove all the failed atoms that belong to different contexts - * Resume all the contexts that were suspend due to failed job - */ -static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) -{ - struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - while (!list_empty(event_list)) { - kbase_job_fault_event_dequeue(kbdev, event_list); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - wake_up(&kbdev->job_fault_resume_wq); - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); -} - -static void kbase_job_fault_resume_worker(struct work_struct *data) -{ - struct base_job_fault_event *event = container_of(data, - struct base_job_fault_event, job_fault_work); - struct kbase_context *kctx; - struct kbase_jd_atom *katom; - - katom = event->katom; - kctx = katom->kctx; - - dev_info(kctx->kbdev->dev, "Job dumping wait\n"); - - /* When it was waked up, it need to check if queue is empty or the - * failed atom belongs to different context. If yes, wake up. Both - * of them mean the failed job has been dumped. Please note, it - * should never happen that the job_fault_event_list has the two - * atoms belong to the same context. - */ - wait_event(kctx->kbdev->job_fault_resume_wq, - kbase_ctx_has_no_event_pending(kctx)); - - atomic_set(&kctx->job_fault_count, 0); - kbase_jd_done_worker(&katom->work); - - /* In case the following atoms were scheduled during failed job dump - * the job_done_worker was held. We need to rerun it after the dump - * was finished - */ - kbase_job_fault_resume_event_cleanup(kctx); - - dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); -} - -static struct base_job_fault_event *kbase_job_fault_event_queue( - struct list_head *event_list, - struct kbase_jd_atom *atom, - u32 completion_code) -{ - struct base_job_fault_event *event; - - event = &atom->fault_event; - - event->katom = atom; - event->event_code = completion_code; - - list_add_tail(&event->head, event_list); - - return event; - -} - -static void kbase_job_fault_event_post(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, u32 completion_code) -{ - struct base_job_fault_event *event; - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, - katom, completion_code); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - - wake_up_interruptible(&kbdev->job_fault_wq); - - INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); - queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); - - dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", - katom->kctx->tgid, katom->kctx->id); - -} - -/* - * This function will process the job fault - * Get the register copy - * Send the failed job dump event - * Create a Wait queue to wait until the job dump finish - */ - -bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, - u32 completion_code) -{ - struct kbase_context *kctx = katom->kctx; - - /* Check if dumping is in the process - * only one atom of each context can be dumped at the same time - * If the atom belongs to different context, it can be dumped - */ - if (atomic_read(&kctx->job_fault_count) > 0) { - kbase_job_fault_event_queue( - &kctx->job_fault_resume_event_list, - katom, completion_code); - dev_info(kctx->kbdev->dev, "queue:%d\n", - kbase_jd_atom_id(kctx, katom)); - return true; - } - - if (kbase_ctx_flag(kctx, KCTX_DYING)) - return false; - - if (kctx->kbdev->job_fault_debug == true) { - - if (completion_code != BASE_JD_EVENT_DONE) { - - if (kbase_job_fault_get_reg_snapshot(kctx) == false) { - dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); - return false; - } - - kbase_job_fault_event_post(kctx->kbdev, katom, - completion_code); - atomic_inc(&kctx->job_fault_count); - dev_info(kctx->kbdev->dev, "post:%d\n", - kbase_jd_atom_id(kctx, katom)); - return true; - - } - } - return false; - -} - -static int debug_job_fault_show(struct seq_file *m, void *v) -{ - struct kbase_device *kbdev = m->private; - struct base_job_fault_event *event = (struct base_job_fault_event *)v; - struct kbase_context *kctx = event->katom->kctx; - int i; - - dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", - kctx->tgid, kctx->id, event->reg_offset); - - if (kctx->reg_dump == NULL) { - dev_warn(kbdev->dev, "reg dump is NULL"); - return -1; - } - - if (kctx->reg_dump[event->reg_offset] == - REGISTER_DUMP_TERMINATION_FLAG) { - /* Return the error here to stop the read. And the - * following next() will not be called. The stop can - * get the real event resource and release it - */ - return -1; - } - - if (event->reg_offset == 0) - seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); - - for (i = 0; i < 50; i++) { - if (kctx->reg_dump[event->reg_offset] == - REGISTER_DUMP_TERMINATION_FLAG) { - break; - } - seq_printf(m, "%08x: %08x\n", - kctx->reg_dump[event->reg_offset], - kctx->reg_dump[1+event->reg_offset]); - event->reg_offset += 2; - - } - - - return 0; -} -static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct kbase_device *kbdev = m->private; - struct base_job_fault_event *event = (struct base_job_fault_event *)v; - - dev_info(kbdev->dev, "debug job fault seq next:%d, %d", - event->reg_offset, (int)*pos); - - return event; -} - -static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) -{ - struct kbase_device *kbdev = m->private; - struct base_job_fault_event *event; - - dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); - - /* The condition is trick here. It needs make sure the - * fault hasn't happened and the dumping hasn't been started, - * or the dumping has finished - */ - if (*pos == 0) { - event = kmalloc(sizeof(*event), GFP_KERNEL); - if (!event) - return NULL; - event->reg_offset = 0; - if (kbase_job_fault_event_wait(kbdev, event)) { - kfree(event); - return NULL; - } - - /* The cache flush workaround is called in bottom half of - * job done but we delayed it. Now we should clean cache - * earlier. Then the GPU memory dump should be correct. - */ - kbase_backend_cache_clean(kbdev, event->katom); - } else - return NULL; - - return event; -} - -static void debug_job_fault_stop(struct seq_file *m, void *v) -{ - struct kbase_device *kbdev = m->private; - - /* here we wake up the kbase_jd_done_worker after stop, it needs - * get the memory dump before the register dump in debug daemon, - * otherwise, the memory dump may be incorrect. - */ - - if (v != NULL) { - kfree(v); - dev_info(kbdev->dev, "debug job fault seq stop stage 1"); - - } else { - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - if (!list_empty(&kbdev->job_fault_event_list)) { - kbase_job_fault_event_dequeue(kbdev, - &kbdev->job_fault_event_list); - wake_up(&kbdev->job_fault_resume_wq); - } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - dev_info(kbdev->dev, "debug job fault seq stop stage 2"); - } - -} - -static const struct seq_operations ops = { - .start = debug_job_fault_start, - .next = debug_job_fault_next, - .stop = debug_job_fault_stop, - .show = debug_job_fault_show, -}; - -static int debug_job_fault_open(struct inode *in, struct file *file) -{ - struct kbase_device *kbdev = in->i_private; - - seq_open(file, &ops); - - ((struct seq_file *)file->private_data)->private = kbdev; - dev_info(kbdev->dev, "debug job fault seq open"); - - kbdev->job_fault_debug = true; - - return 0; - -} - -static int debug_job_fault_release(struct inode *in, struct file *file) -{ - struct kbase_device *kbdev = in->i_private; - - seq_release(in, file); - - kbdev->job_fault_debug = false; - - /* Clean the unprocessed job fault. After that, all the suspended - * contexts could be rescheduled. - */ - kbase_job_fault_event_cleanup(kbdev); - - dev_info(kbdev->dev, "debug job fault seq close"); - - return 0; -} - -static const struct file_operations kbasep_debug_job_fault_fops = { - .open = debug_job_fault_open, - .read = seq_read, - .llseek = seq_lseek, - .release = debug_job_fault_release, -}; - -/* - * Initialize debugfs entry for job fault dump - */ -void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("job_fault", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_debug_job_fault_fops); -} - - -int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) -{ - - INIT_LIST_HEAD(&kbdev->job_fault_event_list); - - init_waitqueue_head(&(kbdev->job_fault_wq)); - init_waitqueue_head(&(kbdev->job_fault_resume_wq)); - spin_lock_init(&kbdev->job_fault_event_lock); - - kbdev->job_fault_resume_workq = alloc_workqueue( - "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); - if (!kbdev->job_fault_resume_workq) - return -ENOMEM; - - kbdev->job_fault_debug = false; - - return 0; -} - -/* - * Release the relevant resource per device - */ -void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) -{ - destroy_workqueue(kbdev->job_fault_resume_workq); -} - - -/* - * Initialize the relevant data structure per context - */ -void kbase_debug_job_fault_context_init(struct kbase_context *kctx) -{ - - /* We need allocate double size register range - * Because this memory will keep the register address and value - */ - kctx->reg_dump = vmalloc(0x4000 * 2); - if (kctx->reg_dump == NULL) - return; - - if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { - vfree(kctx->reg_dump); - kctx->reg_dump = NULL; - } - INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); - atomic_set(&kctx->job_fault_count, 0); - -} - -/* - * release the relevant resource per context - */ -void kbase_debug_job_fault_context_term(struct kbase_context *kctx) -{ - vfree(kctx->reg_dump); -} - -void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) -{ - WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); - - kbase_ctx_remove_pending_event(kctx); -} - -#else /* CONFIG_DEBUG_FS */ - -int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) -{ - kbdev->job_fault_debug = false; - - return 0; -} - -void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) -{ -} - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.h deleted file mode 100755 index ef69627cdce8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_job_fault.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_DEBUG_JOB_FAULT_H -#define _KBASE_DEBUG_JOB_FAULT_H - -#include -#include - -#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF - -/** - * kbase_debug_job_fault_dev_init - Create the fault event wait queue - * per device and initialize the required lists. - * @kbdev: Device pointer - * - * Return: Zero on success or a negative error code. - */ -int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); - -/** - * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs - * @kbdev: Device pointer - */ -void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); - -/** - * kbase_debug_job_fault_dev_term - Clean up resources created in - * kbase_debug_job_fault_dev_init. - * @kbdev: Device pointer - */ -void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); - -/** - * kbase_debug_job_fault_context_init - Initialize the relevant - * data structure per context - * @kctx: KBase context pointer - */ -void kbase_debug_job_fault_context_init(struct kbase_context *kctx); - -/** - * kbase_debug_job_fault_context_term - Release the relevant - * resource per context - * @kctx: KBase context pointer - */ -void kbase_debug_job_fault_context_term(struct kbase_context *kctx); - -/** - * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault - * dumping on context termination. - * - * This function is called during context termination to unblock the atom for - * which the job fault occurred and also the atoms following it. This is needed - * otherwise the wait for zero jobs could timeout (leading to an assertion - * failure, kernel panic in debug builds) in the pathological case where - * although the thread/daemon capturing the job fault events is running, - * but for some reasons has stopped consuming the events. - * - * @kctx: KBase context pointer - */ -void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); - -/** - * kbase_debug_job_fault_process - Process the failed job. - * It will send a event and wake up the job fault waiting queue - * Then create a work queue to wait for job dump finish - * This function should be called in the interrupt handler and before - * jd_done that make sure the jd_done_worker will be delayed until the - * job dump finish - * @katom: The failed atom pointer - * @completion_code: the job status - * @return true if dump is going on - */ -bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, - u32 completion_code); - - -/** - * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers - * address during the job fault process, the relevant registers will - * be saved when a job fault happen - * @kctx: KBase context pointer - * @reg_range: Maximum register address space - * @return true if initializing successfully - */ -bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, - int reg_range); - -/** - * kbase_job_fault_get_reg_snapshot - Read the interested registers for - * failed job dump - * @kctx: KBase context pointer - * @return true if getting registers successfully - */ -bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); - -#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.c deleted file mode 100755 index 8f46117ab9db..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Debugfs interface to dump the memory visible to the GPU - */ - -#include "mali_kbase_debug_mem_view.h" -#include "mali_kbase.h" - -#include -#include - -#ifdef CONFIG_DEBUG_FS - -#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) -#endif - -struct debug_mem_mapping { - struct list_head node; - - struct kbase_mem_phy_alloc *alloc; - unsigned long flags; - - u64 start_pfn; - size_t nr_pages; -}; - -struct debug_mem_data { - struct list_head mapping_list; - struct kbase_context *kctx; -}; - -struct debug_mem_seq_off { - struct list_head *lh; - size_t offset; -}; - -static void *debug_mem_start(struct seq_file *m, loff_t *_pos) -{ - struct debug_mem_data *mem_data = m->private; - struct debug_mem_seq_off *data; - struct debug_mem_mapping *map; - loff_t pos = *_pos; - - list_for_each_entry(map, &mem_data->mapping_list, node) { - if (pos >= map->nr_pages) { - pos -= map->nr_pages; - } else { - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return NULL; - data->lh = &map->node; - data->offset = pos; - return data; - } - } - - /* Beyond the end */ - return NULL; -} - -static void debug_mem_stop(struct seq_file *m, void *v) -{ - kfree(v); -} - -static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct debug_mem_data *mem_data = m->private; - struct debug_mem_seq_off *data = v; - struct debug_mem_mapping *map; - - map = list_entry(data->lh, struct debug_mem_mapping, node); - - if (data->offset < map->nr_pages - 1) { - data->offset++; - ++*pos; - return data; - } - - if (list_is_last(data->lh, &mem_data->mapping_list)) { - kfree(data); - return NULL; - } - - data->lh = data->lh->next; - data->offset = 0; - ++*pos; - - return data; -} - -static int debug_mem_show(struct seq_file *m, void *v) -{ - struct debug_mem_data *mem_data = m->private; - struct debug_mem_seq_off *data = v; - struct debug_mem_mapping *map; - int i, j; - struct page *page; - uint32_t *mapping; - pgprot_t prot = PAGE_KERNEL; - - map = list_entry(data->lh, struct debug_mem_mapping, node); - - kbase_gpu_vm_lock(mem_data->kctx); - - if (data->offset >= map->alloc->nents) { - seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + - data->offset) << PAGE_SHIFT); - goto out; - } - - if (!(map->flags & KBASE_REG_CPU_CACHED)) - prot = pgprot_writecombine(prot); - - page = as_page(map->alloc->pages[data->offset]); - mapping = vmap(&page, 1, VM_MAP, prot); - if (!mapping) - goto out; - - for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { - seq_printf(m, "%016llx:", i + ((map->start_pfn + - data->offset) << PAGE_SHIFT)); - - for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) - seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); - seq_putc(m, '\n'); - } - - vunmap(mapping); - - seq_putc(m, '\n'); - -out: - kbase_gpu_vm_unlock(mem_data->kctx); - return 0; -} - -static const struct seq_operations ops = { - .start = debug_mem_start, - .next = debug_mem_next, - .stop = debug_mem_stop, - .show = debug_mem_show, -}; - -static int debug_mem_zone_open(struct rb_root *rbtree, - struct debug_mem_data *mem_data) -{ - int ret = 0; - struct rb_node *p; - struct kbase_va_region *reg; - struct debug_mem_mapping *mapping; - - for (p = rb_first(rbtree); p; p = rb_next(p)) { - reg = rb_entry(p, struct kbase_va_region, rblink); - - if (reg->gpu_alloc == NULL) - /* Empty region - ignore */ - continue; - - mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); - if (!mapping) { - ret = -ENOMEM; - goto out; - } - - mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - mapping->start_pfn = reg->start_pfn; - mapping->nr_pages = reg->nr_pages; - mapping->flags = reg->flags; - list_add_tail(&mapping->node, &mem_data->mapping_list); - } - -out: - return ret; -} - -static int debug_mem_open(struct inode *i, struct file *file) -{ - struct file *kctx_file = i->i_private; - struct kbase_context *kctx = kctx_file->private_data; - struct debug_mem_data *mem_data; - int ret; - - if (get_file_rcu(kctx_file) == 0) - return -ENOENT; - - ret = seq_open(file, &ops); - if (ret) - goto open_fail; - - mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); - if (!mem_data) { - ret = -ENOMEM; - goto out; - } - - mem_data->kctx = kctx; - - INIT_LIST_HEAD(&mem_data->mapping_list); - - kbase_gpu_vm_lock(kctx); - - ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); - if (0 != ret) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); - if (0 != ret) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - kbase_gpu_vm_unlock(kctx); - - ((struct seq_file *)file->private_data)->private = mem_data; - - return 0; - -out: - if (mem_data) { - while (!list_empty(&mem_data->mapping_list)) { - struct debug_mem_mapping *mapping; - - mapping = list_first_entry(&mem_data->mapping_list, - struct debug_mem_mapping, node); - kbase_mem_phy_alloc_put(mapping->alloc); - list_del(&mapping->node); - kfree(mapping); - } - kfree(mem_data); - } - seq_release(i, file); -open_fail: - fput(kctx_file); - - return ret; -} - -static int debug_mem_release(struct inode *inode, struct file *file) -{ - struct file *kctx_file = inode->i_private; - struct seq_file *sfile = file->private_data; - struct debug_mem_data *mem_data = sfile->private; - struct debug_mem_mapping *mapping; - - seq_release(inode, file); - - while (!list_empty(&mem_data->mapping_list)) { - mapping = list_first_entry(&mem_data->mapping_list, - struct debug_mem_mapping, node); - kbase_mem_phy_alloc_put(mapping->alloc); - list_del(&mapping->node); - kfree(mapping); - } - - kfree(mem_data); - - fput(kctx_file); - - return 0; -} - -static const struct file_operations kbase_debug_mem_view_fops = { - .open = debug_mem_open, - .release = debug_mem_release, - .read = seq_read, - .llseek = seq_lseek -}; - -/** - * kbase_debug_mem_view_init - Initialise the mem_view sysfs file - * @kctx_file: The /dev/mali0 file instance for the context - * - * This function creates a "mem_view" file which can be used to get a view of - * the context's memory as the GPU sees it (i.e. using the GPU's page tables). - * - * The file is cleaned up by a call to debugfs_remove_recursive() deleting the - * parent directory. - */ -void kbase_debug_mem_view_init(struct file *kctx_file) -{ - struct kbase_context *kctx = kctx_file->private_data; - - debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file, - &kbase_debug_mem_view_fops); -} - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.h deleted file mode 100755 index 886ca9448cf5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_debug_mem_view.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * - * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_DEBUG_MEM_VIEW_H -#define _KBASE_DEBUG_MEM_VIEW_H - -#include - -void kbase_debug_mem_view_init(struct file *kctx_file); - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_defs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_defs.h deleted file mode 100755 index a135742ee980..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_defs.h +++ /dev/null @@ -1,2223 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_defs.h - * - * Defintions (types, defines, etcs) common to Kbase. They are placed here to - * allow the hierarchy of header files to work. - */ - -#ifndef _KBASE_DEFS_H_ -#define _KBASE_DEFS_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include -#include -#include - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -#include -#endif - -#if defined(CONFIG_SYNC) -#include -#else -#include "mali_kbase_fence_defs.h" -#endif - -#ifdef CONFIG_DEBUG_FS -#include -#endif /* CONFIG_DEBUG_FS */ - -#ifdef CONFIG_MALI_DEVFREQ -#include -#endif /* CONFIG_MALI_DEVFREQ */ - -#include -#include - -#if defined(CONFIG_PM_RUNTIME) || \ - (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) -#define KBASE_PM_RUNTIME 1 -#endif - -/** Enable SW tracing when set */ -#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE -#define KBASE_TRACE_ENABLE 1 -#endif - -#ifndef KBASE_TRACE_ENABLE -#ifdef CONFIG_MALI_DEBUG -#define KBASE_TRACE_ENABLE 1 -#else -#define KBASE_TRACE_ENABLE 0 -#endif /* CONFIG_MALI_DEBUG */ -#endif /* KBASE_TRACE_ENABLE */ - -/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ -#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 - -/** - * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. - * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU - * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware - * before resetting. - */ -#define ZAP_TIMEOUT 1000 - -/** Number of milliseconds before we time out on a GPU soft/hard reset */ -#define RESET_TIMEOUT 500 - -/** - * Prevent soft-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * Therefore, soft stop may still be disabled due to HW issues. - * - * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 - -/** - * Prevent hard-stops from occuring in scheduling situations - * - * This is not due to HW issues, but when scheduling is desired to be more predictable. - * - * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. - * - * @note if not in use, define this value to 0 instead of \#undef'ing it - */ -#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 - -/** - * The maximum number of Job Slots to support in the Hardware. - * - * You can optimize this down if your target devices will only ever support a - * small number of job slots. - */ -#define BASE_JM_MAX_NR_SLOTS 3 - -/** - * The maximum number of Address Spaces to support in the Hardware. - * - * You can optimize this down if your target devices will only ever support a - * small number of Address Spaces - */ -#define BASE_MAX_NR_AS 16 - -/* mmu */ -#define MIDGARD_MMU_LEVEL(x) (x) - -#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) - -#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) - -#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) - -/** setting in kbase_context::as_nr that indicates it's invalid */ -#define KBASEP_AS_NR_INVALID (-1) - -#define KBASE_LOCK_REGION_MAX_SIZE (63) -#define KBASE_LOCK_REGION_MIN_SIZE (11) - -#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ -#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) -#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) - -#include "mali_kbase_js_defs.h" -#include "mali_kbase_hwaccess_defs.h" - -#define KBASEP_FORCE_REPLAY_DISABLED 0 - -/* Maximum force replay limit when randomization is enabled */ -#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 - -/* Maximum number of pages of memory that require a permanent mapping, per - * kbase_context - */ -#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \ - PAGE_SHIFT) - -/** Atom has been previously soft-stoppped */ -#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) -/** Atom has been previously retried to execute */ -#define KBASE_KATOM_FLAGS_RERUN (1<<2) -/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps to - * disambiguate short-running job chains during soft/hard stopping of jobs - */ -#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) -/** Atom has been previously hard-stopped. */ -#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) -/** Atom has caused us to enter disjoint state */ -#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) -/* Atom blocked on cross-slot dependency */ -#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) -/* Atom has fail dependency on cross-slot dependency */ -#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) -/* Atom is currently holding a context reference */ -#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in protected mode */ -#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -/* Atom has been stored in runnable_tree */ -#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) -/* Atom is waiting for L2 caches to power up in order to enter protected mode */ -#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) - -/* SW related flags about types of JS_COMMAND action - * NOTE: These must be masked off by JS_COMMAND_MASK */ - -/** This command causes a disjoint event */ -#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 - -/** Bitmask of all SW related flags */ -#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) - -#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) -#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks -#endif - -/** Soft-stop command that causes a Disjoint event. This of course isn't - * entirely masked off by JS_COMMAND_MASK */ -#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ - (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) - -#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT - -/* Serialize atoms within a slot (ie only one atom per job slot) */ -#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) -/* Serialize atoms between slots (ie only one job slot running at any time) */ -#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) -/* Reset the GPU after each atom completion */ -#define KBASE_SERIALIZE_RESET (1 << 2) - -/* Forward declarations */ -struct kbase_context; -struct kbase_device; -struct kbase_as; -struct kbase_mmu_setup; -struct kbase_ipa_model_vinstr_data; - -#ifdef CONFIG_DEBUG_FS -/** - * struct base_job_fault_event - keeps track of the atom which faulted or which - * completed after the faulty atom but before the - * debug data for faulty atom was dumped. - * - * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for the - * atom which faulted. - * @katom: pointer to the atom for which job fault occurred or which completed - * after the faulty atom. - * @job_fault_work: work item, queued only for the faulty atom, which waits for - * the dumping to get completed and then does the bottom half - * of job done for the atoms which followed the faulty atom. - * @head: List head used to store the atom in the global list of faulty - * atoms or context specific list of atoms which got completed - * during the dump. - * @reg_offset: offset of the register to be dumped next, only applicable for - * the faulty atom. - */ -struct base_job_fault_event { - - u32 event_code; - struct kbase_jd_atom *katom; - struct work_struct job_fault_work; - struct list_head head; - int reg_offset; -}; - -#endif - -/** - * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. - * @atom: pointer to the dependee atom. - * @dep_type: type of dependency on the dependee @atom, i.e. order or data - * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. - */ -struct kbase_jd_atom_dependency { - struct kbase_jd_atom *atom; - u8 dep_type; -}; - -/** - * struct kbase_io_access - holds information about 1 register access - * - * @addr: first bit indicates r/w (r=0, w=1) - * @value: value written or read - */ -struct kbase_io_access { - uintptr_t addr; - u32 value; -}; - -/** - * struct kbase_io_history - keeps track of all recent register accesses - * - * @enabled: true if register accesses are recorded, false otherwise - * @lock: spinlock protecting kbase_io_access array - * @count: number of registers read/written - * @size: number of elements in kbase_io_access array - * @buf: array of kbase_io_access - */ -struct kbase_io_history { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool enabled; -#else - u32 enabled; -#endif - - spinlock_t lock; - size_t count; - u16 size; - struct kbase_io_access *buf; -}; - -/** - * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the - * dependee atom. - * @dep: pointer to the dependency info structure. - * - * Return: readonly reference to dependee atom. - */ -static inline const struct kbase_jd_atom * -kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return (const struct kbase_jd_atom *)(dep->atom); -} - -/** - * kbase_jd_katom_dep_type - Retrieves the dependency type info - * - * @dep: pointer to the dependency info structure. - * - * Return: the type of dependency there is on the dependee atom. - */ -static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) -{ - LOCAL_ASSERT(dep != NULL); - - return dep->dep_type; -} - -/** - * kbase_jd_katom_dep_set - sets up the dependency info structure - * as per the values passed. - * @const_dep: pointer to the dependency info structure to be setup. - * @a: pointer to the dependee atom. - * @type: type of dependency there is on the dependee atom. - */ -static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, - struct kbase_jd_atom *a, u8 type) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = a; - dep->dep_type = type; -} - -/** - * kbase_jd_katom_dep_clear - resets the dependency info structure - * - * @const_dep: pointer to the dependency info structure to be setup. - */ -static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) -{ - struct kbase_jd_atom_dependency *dep; - - LOCAL_ASSERT(const_dep != NULL); - - dep = (struct kbase_jd_atom_dependency *)const_dep; - - dep->atom = NULL; - dep->dep_type = BASE_JD_DEP_TYPE_INVALID; -} - -/** - * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it becomes - * runnable, with respect to job slot ringbuffer/fifo. - * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, which - * implies that either atom has not become runnable - * due to dependency or has completed the execution - * on GPU. - * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is blocked - * due to cross slot dependency, can't be submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot fifo but - * is waiting for the completion of previously added atoms - * in current & other slots, as their protected mode - * requirements do not match with the current atom. - * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo and is - * waiting for completion of protected mode transition, - * needed before the atom is submitted to GPU. - * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is waiting - * for the cores, which are needed to execute the job - * chain represented by the atom, to become available - * @KBASE_ATOM_GPU_RB_WAITING_AFFINITY: Atom is in slot fifo but is blocked on - * affinity due to rmu workaround for Hw issue 8987. - * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to GPU. - * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted to GPU. - * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some failure, - * but only after the previously added atoms in fifo - * have completed or have also been returned to JS. - */ -enum kbase_atom_gpu_rb_state { - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, - KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, - KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - KBASE_ATOM_GPU_RB_READY, - KBASE_ATOM_GPU_RB_SUBMITTED, - KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 -}; - -/** - * enum kbase_atom_enter_protected_state - The state of an atom with respect to the - * preparation for GPU's entry into protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to - * become disabled before entry into protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the coherency change. L2 shall be powered down and GPU shall - * come out of fully coherent mode before entering protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; - * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on so that - * coherency register contains correct value when GPU enters - * protected mode. - * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for BASE_HW_ISSUE_TGOX_R1_1234 check - * that L2 is powered up and switch GPU to protected mode. - */ -enum kbase_atom_enter_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK. - */ - KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, - KBASE_ATOM_ENTER_PROTECTED_HWCNT, - KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, - KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, - KBASE_ATOM_ENTER_PROTECTED_FINISHED, -}; - -/** - * enum kbase_atom_exit_protected_state - The state of an atom with respect to the - * preparation for GPU's exit from protected mode, becomes - * pertinent only after atom's state with respect to slot - * ringbuffer is KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION - * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any atoms - * currently submitted to GPU and protected mode transition is - * not already in progress. - * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation - * for the reset, as exiting protected mode requires a reset. - * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from protected mode - * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to complete - */ -enum kbase_atom_exit_protected_state { - /** - * NOTE: The integer value of this must match KBASE_ATOM_ENTER_PROTECTED_CHECK. - */ - KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, - KBASE_ATOM_EXIT_PROTECTED_RESET, - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, -}; - -/** - * struct kbase_ext_res - Contains the info for external resources referred - * by an atom, which have been mapped on GPU side. - * @gpu_address: Start address of the memory region allocated for - * the resource from GPU virtual address space. - * @alloc: pointer to physical pages tracking object, set on - * mapping the external resource on GPU side. - */ -struct kbase_ext_res { - u64 gpu_address; - struct kbase_mem_phy_alloc *alloc; -}; - -/** - * struct kbase_jd_atom - object representing the atom, containing the complete - * state and attributes of an atom. - * @work: work item for the bottom half processing of the atom, - * by JD or JS, after it got executed on GPU or the input - * fence got signaled - * @start_timestamp: time at which the atom was submitted to the GPU, by - * updating the JS_HEAD_NEXTn register. - * @udata: copy of the user data sent for the atom in base_jd_submit. - * @kctx: Pointer to the base context with which the atom is associated. - * @dep_head: Array of 2 list heads, pointing to the two list of atoms - * which are blocked due to dependency on this atom. - * @dep_item: Array of 2 list heads, used to store the atom in the list of - * other atoms depending on the same dependee atom. - * @dep: Array containing the dependency info for the 2 atoms on which - * the atom depends upon. - * @jd_item: List head used during job dispatch job_done processing - as - * dependencies may not be entirely resolved at this point, - * we need to use a separate list head. - * @in_jd_list: flag set to true if atom's @jd_item is currently on a list, - * prevents atom being processed twice. - * @nr_extres: number of external resources referenced by the atom. - * @extres: pointer to the location containing info about @nr_extres - * external resources referenced by the atom. - * @device_nr: indicates the coregroup with which the atom is associated, - * when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. - * @jc: GPU address of the job-chain. - * @softjob_data: Copy of data read from the user space buffer that @jc - * points to. - * @fence: Stores either an input or output sync fence, depending - * on soft-job type - * @sync_waiter: Pointer to the sync fence waiter structure passed to the - * callback function on signaling of the input fence. - * @dma_fence: object containing pointers to both input & output fences - * and other related members used for explicit sync through - * soft jobs and for the implicit synchronization required - * on access to external resources. - * @event_code: Event code for the job chain represented by the atom, both - * HW and low-level SW events are represented by event codes. - * @core_req: bitmask of BASE_JD_REQ_* flags specifying either Hw or Sw - * requirements for the job chain represented by the atom. - * @ticks: Number of scheduling ticks for which atom has been running - * on the GPU. - * @sched_priority: Priority of the atom for Job scheduling, as per the - * KBASE_JS_ATOM_SCHED_PRIO_*. - * @poking: Indicates whether poking of MMU is ongoing for the atom, - * as a WA for the issue HW_ISSUE_8316. - * @completed: Wait queue to wait upon for the completion of atom. - * @status: Indicates at high level at what stage the atom is in, - * as per KBASE_JD_ATOM_STATE_*, that whether it is not in - * use or its queued in JD or given to JS or submitted to Hw - * or it completed the execution on Hw. - * @work_id: used for GPU tracepoints, its a snapshot of the 'work_id' - * counter in kbase_jd_context which is incremented on - * every call to base_jd_submit. - * @slot_nr: Job slot chosen for the atom. - * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the exact - * low level state of the atom. - * @retry_count: Number of times this atom has been retried. Used by replay - * soft job. - * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking - * atom's state after it has entered Job scheduler on becoming - * runnable. Atom could be blocked due to cross slot dependency - * or waiting for the shader cores to become available or - * waiting for protected mode transitions to complete. - * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU - * cache is needed for the atom and the shader cores used - * for atom have been kept on. - * @blocked: flag indicating that atom's resubmission to GPU is - * blocked till the work item is scheduled to return the - * atom to JS. - * @pre_dep: Pointer to atom that this atom has same-slot dependency on - * @post_dep: Pointer to atom that has same-slot dependency on this atom - * @x_pre_dep: Pointer to atom that this atom has cross-slot dependency on - * @x_post_dep: Pointer to atom that has cross-slot dependency on this atom - * @flush_id: The GPU's flush count recorded at the time of submission, - * used for the cache flush optimisation - * @fault_event: Info for dumping the debug data on Job fault. - * @queue: List head used for 4 different purposes : - * Adds atom to the list of dma-buf fence waiting atoms. - * Adds atom to the list of atoms blocked due to cross - * slot dependency. - * Adds atom to the list of softjob atoms for which JIT - * allocation has been deferred - * Adds atom to the list of softjob atoms waiting for the - * signaling of fence. - * @jit_node: Used to keep track of all JIT free/alloc jobs in submission order - * @jit_blocked: Flag indicating that JIT allocation requested through - * softjob atom will be reattempted after the impending - * free of other active JIT allocations. - * @will_fail_event_code: If non-zero, this indicates that the atom will fail - * with the set event_code when the atom is processed. - * Used for special handling of atoms, which have a data - * dependency on the failed atoms. - * @protected_state: State of the atom, as per KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, - * when transitioning into or out of protected mode. Atom will - * be either entering or exiting the protected mode. - * @runnable_tree_node: The node added to context's job slot specific rb tree - * when the atom becomes runnable. - * @age: Age of atom relative to other atoms in the context, is - * snapshot of the age_count counter in kbase context. - */ -struct kbase_jd_atom { - struct work_struct work; - ktime_t start_timestamp; - - struct base_jd_udata udata; - struct kbase_context *kctx; - - struct list_head dep_head[2]; - struct list_head dep_item[2]; - const struct kbase_jd_atom_dependency dep[2]; - struct list_head jd_item; - bool in_jd_list; - - u16 nr_extres; - struct kbase_ext_res *extres; - - u32 device_nr; - u64 jc; - void *softjob_data; -#if defined(CONFIG_SYNC) - struct sync_fence *fence; - struct sync_fence_waiter sync_waiter; -#endif /* CONFIG_SYNC */ -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - struct { - /* Use the functions/API defined in mali_kbase_fence.h to - * when working with this sub struct */ -#if defined(CONFIG_SYNC_FILE) - /* Input fence */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence_in; -#else - struct dma_fence *fence_in; -#endif -#endif - /* This points to the dma-buf output fence for this atom. If - * this is NULL then there is no fence for this atom and the - * following fields related to dma_fence may have invalid data. - * - * The context and seqno fields contain the details for this - * fence. - * - * This fence is signaled when the katom is completed, - * regardless of the event_code of the katom (signal also on - * failure). - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - /* The dma-buf fence context number for this atom. A unique - * context number is allocated to each katom in the context on - * context creation. - */ - unsigned int context; - /* The dma-buf fence sequence number for this atom. This is - * increased every time this katom uses dma-buf fence. - */ - atomic_t seqno; - /* This contains a list of all callbacks set up to wait on - * other fences. This atom must be held back from JS until all - * these callbacks have been called and dep_count have reached - * 0. The initial value of dep_count must be equal to the - * number of callbacks on this list. - * - * This list is protected by jctx.lock. Callbacks are added to - * this list when the atom is built and the wait are set up. - * All the callbacks then stay on the list until all callbacks - * have been called and the atom is queued, or cancelled, and - * then all callbacks are taken off the list and freed. - */ - struct list_head callbacks; - /* Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. When dep_count reaches 0 the - * atom may be queued. - * - * The special value "-1" may only be set after the count - * reaches 0, while holding jctx.lock. This indicates that the - * atom has been handled, either queued in JS or cancelled. - * - * If anyone but the dma-fence worker sets this to -1 they must - * ensure that any potentially queued worker must have - * completed before allowing the atom to be marked as unused. - * This can be done by flushing the fence work queue: - * kctx->dma_fence.wq. - */ - atomic_t dep_count; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/ - - /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ - enum base_jd_event_code event_code; - base_jd_core_req core_req; - - u32 ticks; - int sched_priority; - - int poking; - - wait_queue_head_t completed; - enum kbase_jd_atom_state status; -#ifdef CONFIG_GPU_TRACEPOINTS - int work_id; -#endif - int slot_nr; - - u32 atom_flags; - - int retry_count; - - enum kbase_atom_gpu_rb_state gpu_rb_state; - - bool need_cache_flush_cores_retained; - - atomic_t blocked; - - struct kbase_jd_atom *pre_dep; - struct kbase_jd_atom *post_dep; - - struct kbase_jd_atom *x_pre_dep; - struct kbase_jd_atom *x_post_dep; - - u32 flush_id; - -#ifdef CONFIG_DEBUG_FS - struct base_job_fault_event fault_event; -#endif - - struct list_head queue; - - struct list_head jit_node; - bool jit_blocked; - - enum base_jd_event_code will_fail_event_code; - - union { - enum kbase_atom_enter_protected_state enter; - enum kbase_atom_exit_protected_state exit; - } protected_state; - - struct rb_node runnable_tree_node; - - u32 age; -}; - -/** - * struct kbase_debug_copy_buffer - information about the buffer to be copied. - * - * @size: size of the buffer in bytes - * @pages: pointer to an array of pointers to the pages which contain - * the buffer - * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was - * allocated with kcalloc - * @nr_pages: number of pages - * @offset: offset into the pages - * @gpu_alloc: pointer to physical memory allocated by the GPU - * @extres_pages: array of pointers to the pages containing external resources - * for this buffer - * @nr_extres_pages: number of pages in @extres_pages - */ -struct kbase_debug_copy_buffer { - size_t size; - struct page **pages; - bool is_vmalloc; - int nr_pages; - size_t offset; - struct kbase_mem_phy_alloc *gpu_alloc; - - struct page **extres_pages; - int nr_extres_pages; -}; - -static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) -{ - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); -} - -/* - * Theory of operations: - * - * Atom objects are statically allocated within the context structure. - * - * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. - */ - -#define KBASE_JD_DEP_QUEUE_SIZE 256 - -/** - * struct kbase_jd_context - per context object encapsulating all the Job dispatcher - * related state. - * @lock: lock to serialize the updates made to the Job dispatcher - * state and kbase_jd_atom objects. - * @sched_info: Structure encapsulating all the Job scheduling info. - * @atoms: Array of the objects representing atoms, containing - * the complete state and attributes of an atom. - * @job_nr: Tracks the number of atoms being processed by the - * kbase. This includes atoms that are not tracked by - * scheduler: 'not ready to run' & 'dependency-only' jobs. - * @zero_jobs_wait: Waitq that reflects whether there are no jobs - * (including SW-only dependency jobs). This is set - * when no jobs are present on the ctx, and clear when - * there are jobs. - * This must be updated atomically with @job_nr. - * note: Job Dispatcher knows about more jobs than the - * Job Scheduler as it is unaware of jobs that are - * blocked on dependencies and SW-only dependency jobs. - * This waitq can be waited upon to find out when the - * context jobs are all done/cancelled (including those - * that might've been blocked on dependencies) - and so, - * whether it can be terminated. However, it should only - * be terminated once it is not present in the run-pool. - * Since the waitq is only set under @lock, the waiter - * should also briefly obtain and drop @lock to guarantee - * that the setter has completed its work on the kbase_context - * @job_done_wq: Workqueue to which the per atom work item is queued - * for bottom half processing when the atom completes - * execution on GPU or the input fence get signaled. - * @tb_lock: Lock to serialize the write access made to @tb to - * to store the register access trace messages. - * @tb: Pointer to the Userspace accessible buffer storing - * the trace messages for register read/write accesses - * made by the Kbase. The buffer is filled in circular - * fashion. - * @tb_wrap_offset: Offset to the end location in the trace buffer, the - * write pointer is moved to the beginning on reaching - * this offset. - * @work_id: atomic variable used for GPU tracepoints, incremented - * on every call to base_jd_submit. - */ -struct kbase_jd_context { - struct mutex lock; - struct kbasep_js_kctx_info sched_info; - struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; - - u32 job_nr; - - wait_queue_head_t zero_jobs_wait; - - struct workqueue_struct *job_done_wq; - - spinlock_t tb_lock; - u32 *tb; - size_t tb_wrap_offset; - -#ifdef CONFIG_GPU_TRACEPOINTS - atomic_t work_id; -#endif -}; - -struct kbase_device_info { - u32 features; -}; - -/** Poking state for BASE_HW_ISSUE_8316 */ -enum { - KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0, - KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1 -}; - -/** Poking state for BASE_HW_ISSUE_8316 */ -typedef u32 kbase_as_poke_state; - -struct kbase_mmu_setup { - u64 transtab; - u64 memattr; - u64 transcfg; -}; - -/** - * struct kbase_fault - object containing data relating to a page or bus fault. - * @addr: Records the faulting address. - * @extra_addr: Records the secondary fault address. - * @status: Records the fault status as reported by Hw. - * @protected_mode: Flag indicating whether the fault occurred in protected mode - * or not. - */ -struct kbase_fault { - u64 addr; - u64 extra_addr; - u32 status; - bool protected_mode; -}; - -/** - * struct kbase_as - object representing an address space of GPU. - * @number: Index at which this address space structure is present - * in an array of address space structures embedded inside the - * struct kbase_device. - * @pf_wq: Workqueue for processing work items related to Bus fault - * and Page fault handling. - * @work_pagefault: Work item for the Page fault handling. - * @work_busfault: Work item for the Bus fault handling. - * @fault_type: Type of fault which occured for this address space, - * regular/unexpected Bus or Page fault. - * @pf_data: Data relating to page fault. - * @bf_data: Data relating to bus fault. - * @current_setup: Stores the MMU configuration for this address space. - * @poke_wq: Workqueue to process the work items queue for poking the - * MMU as a WA for BASE_HW_ISSUE_8316. - * @poke_work: Work item to do the poking of MMU for this address space. - * @poke_refcount: Refcount for the need of poking MMU. While the refcount is - * non zero the poking of MMU will continue. - * Protected by hwaccess_lock. - * @poke_state: State indicating whether poking is in progress or it has - * been stopped. Protected by hwaccess_lock. - * @poke_timer: Timer used to schedule the poking at regular intervals. - */ -struct kbase_as { - int number; - struct workqueue_struct *pf_wq; - struct work_struct work_pagefault; - struct work_struct work_busfault; - enum kbase_mmu_fault_type fault_type; - struct kbase_fault pf_data; - struct kbase_fault bf_data; - struct kbase_mmu_setup current_setup; - struct workqueue_struct *poke_wq; - struct work_struct poke_work; - int poke_refcount; - kbase_as_poke_state poke_state; - struct hrtimer poke_timer; -}; - -/** - * struct kbase_mmu_table - object representing a set of GPU page tables - * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries - * of top & intermediate level page tables to avoid - * repeated calls to kmap_atomic during the MMU teardown. - * @mmu_lock: Lock to serialize the accesses made to multi level GPU - * page tables - * @pgd: Physical address of the page allocated for the top - * level page table of the context, this is used for - * MMU HW programming as the address translation will - * start from the top level page table. - * @kctx: If this set of MMU tables belongs to a context then - * this is a back-reference to the context, otherwise - * it is NULL - */ -struct kbase_mmu_table { - u64 *mmu_teardown_pages; - struct mutex mmu_lock; - phys_addr_t pgd; - struct kbase_context *kctx; -}; - -static inline int kbase_as_has_bus_fault(struct kbase_as *as) -{ - return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS; -} - -static inline int kbase_as_has_page_fault(struct kbase_as *as) -{ - return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE; -} - -struct kbasep_mem_device { - atomic_t used_pages; /* Tracks usage of OS shared memory. Updated - when OS memory is allocated/freed. */ - -}; - -#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X - -enum kbase_trace_code { - /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE - * THIS MUST BE USED AT THE START OF THE ENUM */ -#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) -#include "mali_kbase_trace_defs.h" -#undef KBASE_TRACE_CODE_MAKE_CODE - /* Comma on its own, to extend the list */ - , - /* Must be the last in the enum */ - KBASE_TRACE_CODE_COUNT -}; - -#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) -#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) - -/** - * struct kbase_trace - object representing a trace message added to trace buffer - * kbase_device::trace_rbuf - * @timestamp: CPU timestamp at which the trace message was added. - * @thread_id: id of the thread in the context of which trace message - * was added. - * @cpu: indicates which CPU the @thread_id was scheduled on when - * the trace message was added. - * @ctx: Pointer to the kbase context for which the trace message - * was added. Will be NULL for certain trace messages like - * for traces added corresponding to power management events. - * Will point to the appropriate context corresponding to - * job-slot & context's reference count related events. - * @katom: indicates if the trace message has atom related info. - * @atom_number: id of the atom for which trace message was added. - * Only valid if @katom is true. - * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. - * Only valid if @katom is true. - * @gpu_addr: GPU address of the job-chain represented by atom. Could - * be valid even if @katom is false. - * @info_val: value specific to the type of event being traced. For the - * case where @katom is true, will be set to atom's affinity, - * i.e. bitmask of shader cores chosen for atom's execution. - * @code: Identifies the event, refer enum kbase_trace_code. - * @jobslot: job-slot for which trace message was added, valid only for - * job-slot management events. - * @refcount: reference count for the context, valid for certain events - * related to scheduler core and policy. - * @flags: indicates if info related to @jobslot & @refcount is present - * in the trace message, used during dumping of the message. - */ -struct kbase_trace { - struct timespec timestamp; - u32 thread_id; - u32 cpu; - void *ctx; - bool katom; - int atom_number; - u64 atom_udata[2]; - u64 gpu_addr; - unsigned long info_val; - u8 code; - u8 jobslot; - u8 refcount; - u8 flags; -}; - -struct kbasep_kctx_list_element { - struct list_head link; - struct kbase_context *kctx; -}; - -/** - * Data stored per device for power management. - * - * This structure contains data for the power management framework. There is one - * instance of this structure per device in the system. - */ -struct kbase_pm_device_data { - /** - * The lock protecting Power Management structures accessed outside of - * IRQ. - * - * This lock must also be held whenever the GPU is being powered on or - * off. - */ - struct mutex lock; - - /** - * The reference count of active contexts on this device. Note that - * some code paths keep shaders/the tiler powered whilst this is 0. Use - * kbase_pm_is_active() instead to check for such cases. - */ - int active_count; - /** Flag indicating suspending/suspended */ - bool suspending; - /* Wait queue set when active_count == 0 */ - wait_queue_head_t zero_active_count_wait; - - /** - * Bit masks identifying the available shader cores that are specified - * via sysfs. One mask per job slot. - */ - u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; - u64 debug_core_mask_all; - - /** - * Callback for initializing the runtime power management. - * - * @param kbdev The kbase device - * - * @return 0 on success, else error code - */ - int (*callback_power_runtime_init)(struct kbase_device *kbdev); - - /** - * Callback for terminating the runtime power management. - * - * @param kbdev The kbase device - */ - void (*callback_power_runtime_term)(struct kbase_device *kbdev); - - /* Time in milliseconds between each dvfs sample */ - u32 dvfs_period; - - struct kbase_pm_backend_data backend; -}; - -/** - * struct kbase_mem_pool - Page based memory pool for kctx/kbdev - * @kbdev: Kbase device where memory is used - * @cur_size: Number of free pages currently in the pool (may exceed - * @max_size in some corner cases) - * @max_size: Maximum number of free pages in the pool - * @order: order = 0 refers to a pool of 4 KB pages - * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) - * @pool_lock: Lock protecting the pool - must be held when modifying - * @cur_size and @page_list - * @page_list: List of free pages in the pool - * @reclaim: Shrinker for kernel reclaim of free pages - * @next_pool: Pointer to next pool where pages can be allocated when this - * pool is empty. Pages will spill over to the next pool when - * this pool is full. Can be NULL if there is no next pool. - * @dying: true if the pool is being terminated, and any ongoing - * operations should be abandoned - * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from - * this pool, eg during a grow operation - */ -struct kbase_mem_pool { - struct kbase_device *kbdev; - size_t cur_size; - size_t max_size; - size_t order; - spinlock_t pool_lock; - struct list_head page_list; - struct shrinker reclaim; - - struct kbase_mem_pool *next_pool; - - bool dying; - bool dont_reclaim; -}; - -/** - * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP - * frequency, and real frequency and core mask - * @opp_freq: Nominal OPP frequency - * @real_freq: Real GPU frequency - * @core_mask: Shader core mask - */ -struct kbase_devfreq_opp { - u64 opp_freq; - u64 real_freq; - u64 core_mask; -}; - -/* MMU mode flags */ -#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ - -/** - * struct kbase_mmu_mode - object containing pointer to methods invoked for - * programming the MMU, as per the MMU mode supported - * by Hw. - * @update: enable & setup/configure one of the GPU address space. - * @get_as_setup: retrieve the configuration of one of the GPU address space. - * @disable_as: disable one of the GPU address space. - * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. - * @ate_is_valid: check if the pte is a valid address translation entry - * encoding the physical address of the actual mapped page. - * @pte_is_valid: check if the pte is a valid entry encoding the physical - * address of the next lower level page table. - * @entry_set_ate: program the pte to be a valid address translation entry to - * encode the physical address of the actual page being mapped. - * @entry_set_pte: program the pte to be a valid entry to encode the physical - * address of the next lower level page table. - * @entry_invalidate: clear out or invalidate the pte. - * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. - */ -struct kbase_mmu_mode { - void (*update)(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr); - void (*get_as_setup)(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup); - void (*disable_as)(struct kbase_device *kbdev, int as_nr); - phys_addr_t (*pte_to_phy_addr)(u64 entry); - int (*ate_is_valid)(u64 ate, unsigned int level); - int (*pte_is_valid)(u64 pte, unsigned int level); - void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, - unsigned long flags, unsigned int level); - void (*entry_set_pte)(u64 *entry, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); - unsigned long flags; -}; - -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); -struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); - - -#define DEVNAME_SIZE 16 - - -/** - * struct kbase_device - Object representing an instance of GPU platform device, - * allocated from the probe method of mali driver. - * @hw_quirks_sc: Configuration to be used for the shader cores as per - * the HW issues present in the GPU. - * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW - * issues present in the GPU. - * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW - * issues present in the GPU. - * @hw_quirks_jm: Configuration to be used for the Job Manager as per - * the HW issues present in the GPU. - * @entry: Links the device instance to the global list of GPU - * devices. The list would have as many entries as there - * are GPU device instances. - * @dev: Pointer to the kernel's generic/base representation - * of the GPU platform device. - * @mdev: Pointer to the miscellaneous device registered to - * provide Userspace access to kernel driver through the - * device file /dev/malixx. - * @reg_start: Base address of the region in physical address space - * where GPU registers have been mapped. - * @reg_size: Size of the region containing GPU registers - * @reg: Kernel virtual address of the region containing GPU - * registers, using which Driver will access the registers. - * @irqs: Array containing IRQ resource info for 3 types of - * interrupts : Job scheduling, MMU & GPU events (like - * power management, cache etc.) - * @clock: Pointer to the input clock resource (having an id of 0), - * referenced by the GPU device node. - * @regulator: Pointer to the struct corresponding to the regulator - * for GPU device - * @devname: string containing the name used for GPU device instance, - * miscellaneous device is registered using the same name. - * @model: Pointer, valid only when Driver is compiled to not access - * the real GPU Hw, to the dummy model which tries to mimic - * to some extent the state & behavior of GPU Hw in response - * to the register accesses made by the Driver. - * @irq_slab: slab cache for allocating the work items queued when - * model mimics raising of IRQ to cause an interrupt on CPU. - * @irq_workq: workqueue for processing the irq work items. - * @serving_job_irq: function to execute work items queued when model mimics - * the raising of JS irq, mimics the interrupt handler - * processing JS interrupts. - * @serving_gpu_irq: function to execute work items queued when model mimics - * the raising of GPU irq, mimics the interrupt handler - * processing GPU interrupts. - * @serving_mmu_irq: function to execute work items queued when model mimics - * the raising of MMU irq, mimics the interrupt handler - * processing MMU interrupts. - * @reg_op_lock: lock used by model to serialize the handling of register - * accesses made by the driver. - * @pm: Per device object for storing data for power management - * framework. - * @js_data: Per device object encapsulating the current context of - * Job Scheduler, which is global to the device and is not - * tied to any particular struct kbase_context running on - * the device - * @mem_pool: Object containing the state for global pool of 4KB size - * physical pages which can be used by all the contexts. - * @lp_mem_pool: Object containing the state for global pool of 2MB size - * physical pages which can be used by all the contexts. - * @memdev: keeps track of the in use physical pages allocated by - * the Driver. - * @mmu_mode: Pointer to the object containing methods for programming - * the MMU, depending on the type of MMU supported by Hw. - * @as: Array of objects representing address spaces of GPU. - * @as_free: Bitpattern of free/available GPU address spaces. - * @as_to_kctx: Array of pointers to struct kbase_context, having - * GPU adrress spaces assigned to them. - * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask - * register used in the handling of Bus & Page faults. - * @gpu_props: Object containing complete information about the - * configuration/properties of GPU HW device in use. - * @hw_issues_mask: List of SW workarounds for HW issues - * @hw_features_mask: List of available HW features. - * @disjoint_event: struct for keeping track of the disjoint information, - * that whether the GPU is in a disjoint state and the - * number of disjoint events that have occurred on GPU. - * @nr_hw_address_spaces: Number of address spaces actually available in the - * GPU, remains constant after driver initialisation. - * @nr_user_address_spaces: Number of address spaces available to user contexts - * @hwcnt: Structure used for instrumentation and HW counters - * dumping - * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. - * @hwcnt_gpu_ctx: Context for GPU hardware counter access. - * @hwaccess_lock must be held when calling - * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. - * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. - * @vinstr_ctx: vinstr context created per device - * @trace_lock: Lock to serialize the access to trace buffer. - * @trace_first_out: Index/offset in the trace buffer at which the first - * unread message is present. - * @trace_next_in: Index/offset in the trace buffer at which the new - * message will be written. - * @trace_rbuf: Pointer to the buffer storing debug messages/prints - * tracing the various events in Driver. - * The buffer is filled in circular fashion. - * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to - * complete for the GPU jobs before proceeding with the - * GPU reset. - * @cache_clean_in_progress: Set when a cache clean has been started, and - * cleared when it has finished. This prevents multiple - * cache cleans being done simultaneously. - * @cache_clean_queued: Set if a cache clean is invoked while another is in - * progress. If this happens, another cache clean needs - * to be triggered immediately after completion of the - * current one. - * @cache_clean_wait: Signalled when a cache clean has finished. - * @platform_context: Platform specific private data to be accessed by - * platform specific config files only. - * @kctx_list: List of kbase_contexts created for the device, including - * the kbase_context created for vinstr_ctx. - * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. - * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed - * to devfreq_add_device() to add devfreq feature to Mali - * GPU device. - * @devfreq: Pointer to devfreq structure for Mali GPU device, - * returned on the call to devfreq_add_device(). - * @current_freq: The real frequency, corresponding to @current_nominal_freq, - * at which the Mali GPU device is currently operating, as - * retrieved from @opp_table in the target callback of - * @devfreq_profile. - * @current_nominal_freq: The nominal frequency currently used for the Mali GPU - * device as retrieved through devfreq_recommended_opp() - * using the freq value passed as an argument to target - * callback of @devfreq_profile - * @current_voltage: The voltage corresponding to @current_nominal_freq, as - * retrieved through dev_pm_opp_get_voltage(). - * @current_core_mask: bitmask of shader cores that are currently desired & - * enabled, corresponding to @current_nominal_freq as - * retrieved from @opp_table in the target callback of - * @devfreq_profile. - * @opp_table: Pointer to the lookup table for converting between nominal - * OPP (operating performance point) frequency, and real - * frequency and core mask. This table is constructed according - * to operating-points-v2-mali table in devicetree. - * @num_opps: Number of operating performance points available for the Mali - * GPU device. - * @devfreq_cooling: Pointer returned on registering devfreq cooling device - * corresponding to @devfreq. - * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected - * mode. It is a sticky flag which is cleared by IPA - * once it has made use of information that GPU had - * previously entered protected mode. - * @ipa: Top level structure for IPA, containing pointers to both - * configured & fallback models. - * @timeline: Stores the global timeline tracking information. - * @job_fault_debug: Flag to control the dumping of debug data for job faults, - * set when the 'job_fault' debugfs file is opened. - * @mali_debugfs_directory: Root directory for the debugfs files created by the driver - * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing - * a sub-directory for every context. - * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault - * has occurred. - * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the - * occurrence of a job fault. - * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait - * for the job fault dumping to complete before they can - * do bottom half of job done for the atoms which followed - * the faulty atom. - * @job_fault_resume_workq: workqueue to process the work items queued for the faulty - * atoms, whereby the work item function waits for the dumping - * to get completed. - * @job_fault_event_list: List of atoms, each belonging to a different context, which - * generated a job fault. - * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list - * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs - * file "read_register". - * @force_replay_limit: Number of gpu jobs, having replay atoms associated with them, - * that are run before a job is forced to fail and replay. - * Set to 0 to disable forced failures. - * @force_replay_count: Count of gpu jobs, having replay atoms associated with them, - * between forced failures. Incremented on each gpu job which - * has replay atoms dependent on it. A gpu job is forced to - * fail once this is greater than or equal to @force_replay_limit - * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay - * job atoms to consider the associated gpu job for forceful - * failure and replay. May be zero - * @force_replay_random: Set to 1 to randomize the @force_replay_limit, in the - * range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. - * @ctx_num: Total number of contexts created for the device. - * @io_history: Pointer to an object keeping a track of all recent - * register accesses. The history of register accesses - * can be read through "regs_history" debugfs file. - * @hwaccess: Contains a pointer to active kbase context and GPU - * backend specific data for HW access layer. - * @faults_pending: Count of page/bus faults waiting for bottom half processing - * via workqueues. - * @poweroff_pending: Set when power off operation for GPU is started, reset when - * power on for GPU is started. - * @infinite_cache_active_default: Set to enable using infinite cache for all the - * allocations of a new context. - * @mem_pool_max_size_default: Initial/default value for the maximum size of both - * types of pool created for a new context. - * @current_gpu_coherency_mode: coherency mode in use, which can be different - * from @system_coherency, when using protected mode. - * @system_coherency: coherency mode as retrieved from the device tree. - * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. - * @snoop_enable_smc: SMC function ID to call into Trusted firmware to - * enable cache snooping. Value of 0 indicates that it - * is not used. - * @snoop_disable_smc: SMC function ID to call disable cache snooping. - * @protected_ops: Pointer to the methods for switching in or out of the - * protected mode, as per the @protected_dev being used. - * @protected_dev: Pointer to the protected mode switcher device attached - * to the GPU device retrieved through device tree if - * GPU do not support protected mode switching natively. - * @protected_mode: set to TRUE when GPU is put into protected mode - * @protected_mode_transition: set to TRUE when GPU is transitioning into or - * out of protected mode. - * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be - * enabled. Counters must be disabled before transition - * into protected mode. - * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not - * enabled. - * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware - * counters, used if atomic disable is not possible. - * @protected_mode_support: set to true if protected mode is supported. - * @buslogger: Pointer to the structure required for interfacing - * with the bus logger module to set the size of buffer - * used by the module for capturing bus logs. - * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of - * IRQ + bottom half is being done, to prevent the writes - * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. - * @inited_subsys: Bitmap of inited sub systems at the time of device probe. - * Used during device remove or for handling error in probe. - * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize - * the updates made to Job dispatcher + scheduler states. - * @mmu_hw_mutex: Protects access to MMU operations and address space - * related state. - * @serialize_jobs: Currently used mode for serialization of jobs, both - * intra & inter slots serialization is supported. - * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken - * when GWT is enabled. Used to restore the original value - * on disabling of GWT. - * @js_ctx_scheduling_mode: Context scheduling mode currently being used by - * Job Scheduler - */ -struct kbase_device { - u32 hw_quirks_sc; - u32 hw_quirks_tiler; - u32 hw_quirks_mmu; - u32 hw_quirks_jm; - - struct list_head entry; - struct device *dev; - struct miscdevice mdev; - u64 reg_start; - size_t reg_size; - void __iomem *reg; - - struct { - int irq; - int flags; - } irqs[3]; - - struct clk *clock; -#ifdef CONFIG_REGULATOR - struct regulator *regulator; -#endif - char devname[DEVNAME_SIZE]; - -#ifdef CONFIG_MALI_NO_MALI - void *model; - struct kmem_cache *irq_slab; - struct workqueue_struct *irq_workq; - atomic_t serving_job_irq; - atomic_t serving_gpu_irq; - atomic_t serving_mmu_irq; - spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ - - struct kbase_pm_device_data pm; - struct kbasep_js_device_data js_data; - struct kbase_mem_pool mem_pool; - struct kbase_mem_pool lp_mem_pool; - struct kbasep_mem_device memdev; - struct kbase_mmu_mode const *mmu_mode; - - struct kbase_as as[BASE_MAX_NR_AS]; - u16 as_free; /* Bitpattern of free Address Spaces */ - struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; - - spinlock_t mmu_mask_change; - - struct kbase_gpu_props gpu_props; - - unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; - - struct { - atomic_t count; - atomic_t state; - } disjoint_event; - - s8 nr_hw_address_spaces; - s8 nr_user_address_spaces; - - struct kbase_hwcnt { - /* The lock should be used when accessing any of the following members */ - spinlock_t lock; - - struct kbase_context *kctx; - u64 addr; - u64 addr_bytes; - - struct kbase_instr_backend backend; - } hwcnt; - - struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; - struct kbase_hwcnt_context *hwcnt_gpu_ctx; - struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; - struct kbase_vinstr_context *vinstr_ctx; - -#if KBASE_TRACE_ENABLE - spinlock_t trace_lock; - u16 trace_first_out; - u16 trace_next_in; - struct kbase_trace *trace_rbuf; -#endif - - u32 reset_timeout_ms; - - bool cache_clean_in_progress; - bool cache_clean_queued; - wait_queue_head_t cache_clean_wait; - - void *platform_context; - - struct list_head kctx_list; - struct mutex kctx_list_lock; - -#ifdef CONFIG_MALI_DEVFREQ - struct devfreq_dev_profile devfreq_profile; - struct devfreq *devfreq; - unsigned long current_freq; - unsigned long current_nominal_freq; - unsigned long current_voltage; - u64 current_core_mask; - struct kbase_devfreq_opp *opp_table; - int num_opps; - struct kbasep_pm_metrics last_devfreq_metrics; -#ifdef CONFIG_DEVFREQ_THERMAL -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) - struct devfreq_cooling_device *devfreq_cooling; -#else - struct thermal_cooling_device *devfreq_cooling; -#endif - bool ipa_protection_mode_switched; - struct { - /* Access to this struct must be with ipa.lock held */ - struct mutex lock; - struct kbase_ipa_model *configured_model; - struct kbase_ipa_model *fallback_model; - - /* Values of the PM utilization metrics from last time the - * power model was invoked. The utilization is calculated as - * the difference between last_metrics and the current values. - */ - struct kbasep_pm_metrics last_metrics; - /* Model data to pass to ipa_gpu_active/idle() */ - struct kbase_ipa_model_vinstr_data *model_data; - - /* true if use of fallback model has been forced by the User */ - bool force_fallback_model; - } ipa; -#endif /* CONFIG_DEVFREQ_THERMAL */ -#endif /* CONFIG_MALI_DEVFREQ */ - - bool job_fault_debug; - -#ifdef CONFIG_DEBUG_FS - struct dentry *mali_debugfs_directory; - struct dentry *debugfs_ctx_directory; - -#ifdef CONFIG_MALI_DEBUG - u64 debugfs_as_read_bitmap; -#endif /* CONFIG_MALI_DEBUG */ - - wait_queue_head_t job_fault_wq; - wait_queue_head_t job_fault_resume_wq; - struct workqueue_struct *job_fault_resume_workq; - struct list_head job_fault_event_list; - spinlock_t job_fault_event_lock; - -#if !MALI_CUSTOMER_RELEASE - struct { - u16 reg_offset; - } regs_dump_debugfs_data; -#endif /* !MALI_CUSTOMER_RELEASE */ -#endif /* CONFIG_DEBUG_FS */ - - -#if MALI_CUSTOMER_RELEASE == 0 - int force_replay_limit; - int force_replay_count; - base_jd_core_req force_replay_core_req; - bool force_replay_random; -#endif - - atomic_t ctx_num; - -#ifdef CONFIG_DEBUG_FS - struct kbase_io_history io_history; -#endif /* CONFIG_DEBUG_FS */ - - struct kbase_hwaccess_data hwaccess; - - atomic_t faults_pending; - - bool poweroff_pending; - - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool infinite_cache_active_default; -#else - u32 infinite_cache_active_default; -#endif - size_t mem_pool_max_size_default; - - u32 current_gpu_coherency_mode; - u32 system_coherency; - - bool cci_snoop_enabled; - - u32 snoop_enable_smc; - u32 snoop_disable_smc; - - struct protected_mode_ops *protected_ops; - - struct protected_mode_device *protected_dev; - - bool protected_mode; - - bool protected_mode_transition; - - bool protected_mode_hwcnt_desired; - - bool protected_mode_hwcnt_disabled; - - struct work_struct protected_mode_hwcnt_disable_work; - - bool protected_mode_support; - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - struct bus_logger_client *buslogger; -#endif - - bool irq_reset_flush; - - u32 inited_subsys; - - spinlock_t hwaccess_lock; - - struct mutex mmu_hw_mutex; - - /* See KBASE_SERIALIZE_* for details */ - u8 serialize_jobs; - -#ifdef CONFIG_MALI_CINSTR_GWT - u8 backup_serialize_jobs; -#endif - - /* See KBASE_JS_*_PRIORITY_MODE for details. */ - u32 js_ctx_scheduling_mode; - -}; - -/** - * struct jsctx_queue - JS context atom queue - * @runnable_tree: Root of RB-tree containing currently runnable atoms on this - * job slot. - * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot - * dependencies. Atoms on this list will be moved to the - * runnable_tree when the blocking atom completes. - * - * hwaccess_lock must be held when accessing this structure. - */ -struct jsctx_queue { - struct rb_root runnable_tree; - struct list_head x_dep_head; -}; - - -#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ - (((minor) & 0xFFF) << 8) | \ - ((0 & 0xFF) << 0)) - -/** - * enum kbase_context_flags - Flags for kbase contexts - * - * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit - * process on a 64-bit kernel. - * - * @KCTX_RUNNABLE_REF: Set when context is counted in - * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. - * - * @KCTX_ACTIVE: Set when the context is active. - * - * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this - * context. - * - * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been - * initialized. - * - * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new - * allocations. Existing allocations will not change. - * - * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. - * - * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept - * scheduled in. - * - * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. - * This is only ever updated whilst the jsctx_mutex is held. - * - * @KCTX_DYING: Set when the context process is in the process of being evicted. - * - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this - * context, to disable use of implicit dma-buf fences. This is used to avoid - * potential synchronization deadlocks. - * - * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory - * allocations. For 64-bit clients it is enabled by default, and disabled by - * default on 32-bit clients. Being able to clear this flag is only used for - * testing purposes of the custom zone allocation on 64-bit user-space builds, - * where we also require more control than is available through e.g. the JIT - * allocation mechanism. However, the 64-bit user-space client must still - * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT - * - * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled - * from it for job slot 0. This is reset when the context first goes active or - * is re-activated on that slot. - * - * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled - * from it for job slot 1. This is reset when the context first goes active or - * is re-activated on that slot. - * - * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled - * from it for job slot 2. This is reset when the context first goes active or - * is re-activated on that slot. - * - * All members need to be separate bits. This enum is intended for use in a - * bitmask where multiple values get OR-ed together. - */ -enum kbase_context_flags { - KCTX_COMPAT = 1U << 0, - KCTX_RUNNABLE_REF = 1U << 1, - KCTX_ACTIVE = 1U << 2, - KCTX_PULLED = 1U << 3, - KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, - KCTX_INFINITE_CACHE = 1U << 5, - KCTX_SUBMIT_DISABLED = 1U << 6, - KCTX_PRIVILEGED = 1U << 7, - KCTX_SCHEDULED = 1U << 8, - KCTX_DYING = 1U << 9, - KCTX_NO_IMPLICIT_SYNC = 1U << 10, - KCTX_FORCE_SAME_VA = 1U << 11, - KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, - KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, - KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, -}; - -struct kbase_sub_alloc { - struct list_head link; - struct page *page; - DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); -}; - - -/** - * struct kbase_context - Object representing an entity, among which GPU is - * scheduled and gets its own GPU address space. - * Created when the device file /dev/malixx is opened. - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. - * @kbdev: Pointer to the Kbase device for which the context is created. - * @mmu: Structure holding details of the MMU tables for this - * context - * @id: Unique indentifier for the context, indicates the number of - * contexts which have been created for the device so far. - * @api_version: contains the version number for User/kernel interface, - * used for compatibility check. - * @event_list: list of posted events about completed atoms, to be sent to - * event handling thread of Userpsace. - * @event_coalesce_list: list containing events corresponding to successive atoms - * which have requested deferred delivery of the completion - * events to Userspace. - * @event_mutex: Lock to protect the concurrent access to @event_list & - * @event_mutex. - * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver - * should stop posting events and also inform event handling - * thread that context termination is in progress. - * @event_workq: Workqueue for processing work items corresponding to atoms - * that do not return an event to Userspace or have to perform - * a replay job - * @event_count: Count of the posted events to be consumed by Userspace. - * @event_coalesce_count: Count of the events present in @event_coalesce_list. - * @flags: bitmap of enums from kbase_context_flags, indicating the - * state & attributes for the context. - * @setup_complete: Indicates if the setup for context has completed, i.e. - * flags have been set for the context. Driver allows only - * 2 ioctls until the setup is done. Valid only for - * @api_version value 0. - * @setup_in_progress: Indicates if the context's setup is in progress and other - * setup calls during that shall be rejected. - * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, - * which can alias number of memory regions. The page is - * represent a region where it is mapped with a write-alloc - * cache setup, typically used when the write result of the - * GPU isn't needed, but the GPU must write anyway. - * @mem_partials_lock: Lock for protecting the operations done on the elements - * added to @mem_partials list. - * @mem_partials: List head for the list of large pages, 2MB in size, which - * which have been split into 4 KB pages and are used - * partially for the allocations >= 2 MB in size. - * @reg_lock: Lock used for GPU virtual address space management operations, - * like adding/freeing a memory region in the address space. - * Can be converted to a rwlock ?. - * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA - * zone of the GPU virtual address space. Used for allocations - * having the same value for GPU & CPU virtual address. - * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA - * zone of the GPU virtual address space. - * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA - * zone of the GPU virtual address space. Used for GPU-executable - * allocations which don't need the SAME_VA property. - * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for - * SAME_VA allocations to defer the reservation of memory region - * (from the GPU virtual address space) from base_mem_alloc - * ioctl to mmap system call. This helps returning unique - * handles, disguised as GPU VA, to Userspace from base_mem_alloc - * and later retrieving the pointer to memory region structure - * in the mmap handler. - * @pending_regions: Array containing pointers to memory region structures, - * used in conjunction with @cookies bitmask mainly for - * providing a mechansim to have the same value for CPU & - * GPU virtual address. - * @event_queue: Wait queue used for blocking the thread, which consumes - * the base_jd_event corresponding to an atom, when there - * are no more posted events. - * @tgid: thread group id of the process, whose thread opened the - * device file /dev/malixx instance to create a context. - * @pid: id of the thread, corresponding to process @tgid, which - * actually which opened the device file. - * @jctx: object encapsulating all the Job dispatcher related state, - * including the array of atoms. - * @used_pages: Keeps a track of the number of 4KB physical pages in use - * for the context. - * @nonmapped_pages: Updated in the same way as @used_pages, except for the case - * when special tracking page is freed by userspace where it - * is reset to 0. - * @permanent_mapped_pages: Usage count of permanently mapped memory - * @mem_pool: Object containing the state for the context specific pool of - * 4KB size physical pages. - * @lp_mem_pool: Object containing the state for the context specific pool of - * 2MB size physical pages. - * @reclaim: Shrinker object registered with the kernel containing - * the pointer to callback function which is invoked under - * low memory conditions. In the callback function Driver - * frees up the memory for allocations marked as - * evictable/reclaimable. - * @evict_list: List head for the list containing the allocations which - * can be evicted or freed up in the shrinker callback. - * @waiting_soft_jobs: List head for the list containing softjob atoms, which - * are either waiting for the event set operation, or waiting - * for the signaling of input fence or waiting for the GPU - * device to powered on so as to dump the CPU/GPU timestamps. - * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent - * accesses. - * @dma_fence: Object containing list head for the list of dma-buf fence - * waiting atoms and the waitqueue to process the work item - * queued for the atoms blocked on the signaling of dma-buf - * fences. - * @as_nr: id of the address space being used for the scheduled in - * context. This is effectively part of the Run Pool, because - * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst - * the context is scheduled in. The hwaccess_lock must be held - * whilst accessing this. - * If the context relating to this value of as_nr is required, - * then the context must be retained to ensure that it doesn't - * disappear whilst it is being used. Alternatively, hwaccess_lock - * can be held to ensure the context doesn't disappear (but this - * has restrictions on what other locks can be taken simutaneously). - * @refcount: Keeps track of the number of users of this context. A user - * can be a job that is available for execution, instrumentation - * needing to 'pin' a context for counter collection, etc. - * If the refcount reaches 0 then this context is considered - * inactive and the previously programmed AS might be cleared - * at any point. - * Generally the reference count is incremented when the context - * is scheduled in and an atom is pulled from the context's per - * slot runnable tree. - * @mm_update_lock: lock used for handling of special tracking page. - * @process_mm: Pointer to the memory descriptor of the process which - * created the context. Used for accounting the physical - * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. - * @same_va_end: End address of the SAME_VA zone (in 4KB page units) - * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) - * or U64_MAX if the EXEC_VA zone is uninitialized. - * @gpu_va_end: End address of the GPU va space (in 4KB page units) - * @jit_va: Indicates if a JIT_VA zone has been created. - * @timeline: Object tracking the number of atoms currently in flight for - * the context and thread group id of the process, i.e. @tgid. - * @mem_profile_data: Buffer containing the profiling information provided by - * Userspace, can be read through the mem_profile debugfs file. - * @mem_profile_size: Size of the @mem_profile_data. - * @mem_profile_lock: Lock to serialize the operations related to mem_profile - * debugfs file. - * @kctx_dentry: Pointer to the debugfs directory created for every context, - * inside kbase_device::debugfs_ctx_directory, containing - * context specific files. - * @reg_dump: Buffer containing a register offset & value pair, used - * for dumping job fault debug info. - * @job_fault_count: Indicates that a job fault occurred for the context and - * dumping of its debug info is in progress. - * @job_fault_resume_event_list: List containing atoms completed after the faulty - * atom but before the debug data for faulty atom was dumped. - * @jsctx_queue: Per slot & priority arrays of object containing the root - * of RB-tree holding currently runnable atoms on the job slot - * and the head item of the linked list of atoms blocked on - * cross-slot dependencies. - * @atoms_pulled: Total number of atoms currently pulled from the context. - * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled - * from the context. - * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently - * pulled from the context. hwaccess_lock shall be held when - * accessing it. - * @blocked_js: Indicates if the context is blocked from submitting atoms - * on a slot at a given priority. This is set to true, when - * the atom corresponding to context is soft/hard stopped or - * removed from the HEAD_NEXT register in response to - * soft/hard stop. - * @slots_pullable: Bitmask of slots, indicating the slots for which the - * context has pullable atoms in the runnable tree. - * @work: Work structure used for deferred ASID assignment. - * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters - * client, there can be only such client per kbase - * context. - * @legacy_hwcnt_lock: Lock used to prevent concurrent access to - * @legacy_hwcnt_cli. - * @completed_jobs: List containing completed atoms for which base_jd_event is - * to be posted. - * @work_count: Number of work items, corresponding to atoms, currently - * pending on job_done workqueue of @jctx. - * @soft_job_timeout: Timer object used for failing/cancelling the waiting - * soft-jobs which have been blocked for more than the - * timeout value used for the soft-jobs - * @jit_alloc: Array of 256 pointers to GPU memory regions, used for - * for JIT allocations. - * @jit_max_allocations: Maximum number of JIT allocations allowed at once. - * @jit_current_allocations: Current number of in-flight JIT allocations. - * @jit_current_allocations_per_bin: Current number of in-flight JIT allocations per bin - * @jit_version: version number indicating whether userspace is using - * old or new version of interface for JIT allocations - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT - * @jit_active_head: List containing the JIT allocations which are in use. - * @jit_pool_head: List containing the JIT allocations which have been - * freed up by userpsace and so not being used by them. - * Driver caches them to quickly fulfill requests for new - * JIT allocations. They are released in case of memory - * pressure as they are put on the @evict_list when they - * are freed up by userspace. - * @jit_destroy_head: List containing the JIT allocations which were moved to it - * from @jit_pool_head, in the shrinker callback, after freeing - * their backing physical pages. - * @jit_evict_lock: Lock used for operations done on JIT allocations and also - * for accessing @evict_list. - * @jit_work: Work item queued to defer the freeing of memory region when - * JIT allocation is moved to @jit_destroy_head. - * @jit_atoms_head: A list of the JIT soft-jobs, both alloc & free, in submission - * order, protected by kbase_jd_context.lock. - * @jit_pending_alloc: A list of JIT alloc soft-jobs for which allocation will be - * reattempted after the impending free of other active JIT - * allocations. - * @ext_res_meta_head: A list of sticky external resources which were requested to - * be mapped on GPU side, through a softjob atom of type - * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. - * @drain_pending: Used to record that a flush/invalidate of the GPU caches was - * requested from atomic context, so that the next flush request - * can wait for the flush of GPU writes. - * @age_count: Counter incremented on every call to jd_submit_atom, - * atom is assigned the snapshot of this counter, which - * is used to determine the atom's age when it is added to - * the runnable RB-tree. - * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) - * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by - * kbase_context.reg_lock. - * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. - * @gwt_current_list: A list of addresses for which GPU has generated write faults, - * after the last snapshot of it was sent to userspace. - * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. - * @priority: Indicates the context priority. Used along with @atoms_count - * for context scheduling, protected by hwaccess_lock. - * @atoms_count: Number of gpu atoms currently in use, per priority - */ -struct kbase_context { - struct file *filp; - struct kbase_device *kbdev; - struct kbase_mmu_table mmu; - - u32 id; - unsigned long api_version; - struct list_head event_list; - struct list_head event_coalesce_list; - struct mutex event_mutex; - atomic_t event_closed; - struct workqueue_struct *event_workq; - atomic_t event_count; - int event_coalesce_count; - - atomic_t flags; - - atomic_t setup_complete; - atomic_t setup_in_progress; - - struct tagged_addr aliasing_sink_page; - - spinlock_t mem_partials_lock; - struct list_head mem_partials; - - struct mutex reg_lock; - struct rb_root reg_rbtree_same; - struct rb_root reg_rbtree_custom; - struct rb_root reg_rbtree_exec; - - - unsigned long cookies; - struct kbase_va_region *pending_regions[BITS_PER_LONG]; - - wait_queue_head_t event_queue; - pid_t tgid; - pid_t pid; - - struct kbase_jd_context jctx; - atomic_t used_pages; - atomic_t nonmapped_pages; - unsigned long permanent_mapped_pages; - - struct kbase_mem_pool mem_pool; - struct kbase_mem_pool lp_mem_pool; - - struct shrinker reclaim; - struct list_head evict_list; - - struct list_head waiting_soft_jobs; - spinlock_t waiting_soft_jobs_lock; -#ifdef CONFIG_MALI_DMA_FENCE - struct { - struct list_head waiting_resource; - struct workqueue_struct *wq; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE */ - - int as_nr; - - atomic_t refcount; - - - /* NOTE: - * - * Flags are in jctx.sched_info.ctx.flags - * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex - * - * All other flags must be added there */ - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; - u64 same_va_end; - u64 exec_va_start; - u64 gpu_va_end; - bool jit_va; - -#ifdef CONFIG_DEBUG_FS - char *mem_profile_data; - size_t mem_profile_size; - struct mutex mem_profile_lock; - struct dentry *kctx_dentry; - - unsigned int *reg_dump; - atomic_t job_fault_count; - struct list_head job_fault_resume_event_list; - -#endif /* CONFIG_DEBUG_FS */ - - struct jsctx_queue jsctx_queue - [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; - - atomic_t atoms_pulled; - atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; - int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ - KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - u32 slots_pullable; - - struct work_struct work; - - struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; - struct mutex legacy_hwcnt_lock; - - struct list_head completed_jobs; - atomic_t work_count; - - struct timer_list soft_job_timeout; - - struct kbase_va_region *jit_alloc[256]; - u8 jit_max_allocations; - u8 jit_current_allocations; - u8 jit_current_allocations_per_bin[256]; - u8 jit_version; - struct list_head jit_active_head; - struct list_head jit_pool_head; - struct list_head jit_destroy_head; - struct mutex jit_evict_lock; - struct work_struct jit_work; - - struct list_head jit_atoms_head; - struct list_head jit_pending_alloc; - - struct list_head ext_res_meta_head; - - atomic_t drain_pending; - - u32 age_count; - - u8 trim_level; - -#ifdef CONFIG_MALI_CINSTR_GWT - bool gwt_enabled; - - bool gwt_was_enabled; - - struct list_head gwt_current_list; - - struct list_head gwt_snapshot_list; -#endif - - int priority; - s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; -}; - -#ifdef CONFIG_MALI_CINSTR_GWT -/** - * struct kbasep_gwt_list_element - Structure used to collect GPU - * write faults. - * @link: List head for adding write faults. - * @region: Details of the region where we have the - * faulting page address. - * @page_addr: Page address where GPU write fault occurred. - * @num_pages: The number of pages modified. - * - * Using this structure all GPU write faults are stored in a list. - */ -struct kbasep_gwt_list_element { - struct list_head link; - struct kbase_va_region *region; - u64 page_addr; - u64 num_pages; -}; - -#endif - -/** - * struct kbase_ctx_ext_res_meta - Structure which binds an external resource - * to a @kbase_context. - * @ext_res_node: List head for adding the metadata to a - * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. - * - * External resources can be mapped into multiple contexts as well as the same - * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. - * This metadata structure binds a single external resource to a single - * context, ensuring that per context mapping is tracked separately so it can - * be overridden when needed and abuses by the application (freeing the resource - * multiple times) don't effect the refcount of the physical allocation. - */ -struct kbase_ctx_ext_res_meta { - struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; -}; - -enum kbase_reg_access_type { - REG_READ, - REG_WRITE -}; - -enum kbase_share_attr_bits { - /* (1ULL << 8) bit is reserved */ - SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ - SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ -}; - -/** - * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. - * @kbdev: kbase device - * - * Return: true if the device access are coherent, false if not. - */ -static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) -{ - if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || - (kbdev->system_coherency == COHERENCY_ACE)) - return true; - - return false; -} - -/* Conversion helpers for setting up high resolution timers */ -#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) -#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) - -/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ -#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 -/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ -#define KBASE_AS_INACTIVE_MAX_LOOPS 100000 - -/* Maximum number of times a job can be replayed */ -#define BASEP_JD_REPLAY_LIMIT 15 - -/* JobDescriptorHeader - taken from the architecture specifications, the layout - * is currently identical for all GPU archs. */ -struct job_descriptor_header { - u32 exception_status; - u32 first_incomplete_task; - u64 fault_pointer; - u8 job_descriptor_size : 1; - u8 job_type : 7; - u8 job_barrier : 1; - u8 _reserved_01 : 1; - u8 _reserved_1 : 1; - u8 _reserved_02 : 1; - u8 _reserved_03 : 1; - u8 _reserved_2 : 1; - u8 _reserved_04 : 1; - u8 _reserved_05 : 1; - u16 job_index; - u16 job_dependency_index_1; - u16 job_dependency_index_2; - union { - u64 _64; - u32 _32; - } next_job; -}; - -#endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_device.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_device.c deleted file mode 100755 index 530bb45c8ec0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_device.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel device APIs - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -/* NOTE: Magic - 0x45435254 (TRCE in ASCII). - * Supports tracing feature provided in the base module. - * Please keep it in sync with the value of base module. - */ -#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 - -#if KBASE_TRACE_ENABLE -static const char *kbasep_trace_code_string[] = { - /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE - * THIS MUST BE USED AT THE START OF THE ARRAY */ -#define KBASE_TRACE_CODE_MAKE_CODE(X) # X -#include "mali_kbase_trace_defs.h" -#undef KBASE_TRACE_CODE_MAKE_CODE -}; -#endif - -#define DEBUG_MESSAGE_SIZE 256 - -static int kbasep_trace_init(struct kbase_device *kbdev); -static void kbasep_trace_term(struct kbase_device *kbdev); -static void kbasep_trace_hook_wrapper(void *param); - -struct kbase_device *kbase_device_alloc(void) -{ - return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); -} - -static int kbase_device_as_init(struct kbase_device *kbdev, int i) -{ - const char format[] = "mali_mmu%d"; - char name[sizeof(format)]; - const char poke_format[] = "mali_mmu%d_poker"; - char poke_name[sizeof(poke_format)]; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) - snprintf(poke_name, sizeof(poke_name), poke_format, i); - - snprintf(name, sizeof(name), format, i); - - kbdev->as[i].number = i; - kbdev->as[i].bf_data.addr = 0ULL; - kbdev->as[i].pf_data.addr = 0ULL; - - kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); - if (!kbdev->as[i].pf_wq) - return -EINVAL; - - INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); - INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { - struct hrtimer *poke_timer = &kbdev->as[i].poke_timer; - struct work_struct *poke_work = &kbdev->as[i].poke_work; - - kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1); - if (!kbdev->as[i].poke_wq) { - destroy_workqueue(kbdev->as[i].pf_wq); - return -EINVAL; - } - INIT_WORK(poke_work, kbasep_as_do_poke); - - hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - - poke_timer->function = kbasep_as_poke_timer_callback; - - kbdev->as[i].poke_refcount = 0; - kbdev->as[i].poke_state = 0u; - } - - return 0; -} - -static void kbase_device_as_term(struct kbase_device *kbdev, int i) -{ - destroy_workqueue(kbdev->as[i].pf_wq); - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) - destroy_workqueue(kbdev->as[i].poke_wq); -} - -static int kbase_device_all_as_init(struct kbase_device *kbdev) -{ - int i, err; - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - err = kbase_device_as_init(kbdev, i); - if (err) - goto free_workqs; - } - - return 0; - -free_workqs: - for (; i > 0; i--) - kbase_device_as_term(kbdev, i); - - return err; -} - -static void kbase_device_all_as_term(struct kbase_device *kbdev) -{ - int i; - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) - kbase_device_as_term(kbdev, i); -} - -int kbase_device_init(struct kbase_device * const kbdev) -{ - int err; -#ifdef CONFIG_ARM64 - struct device_node *np = NULL; -#endif /* CONFIG_ARM64 */ - - spin_lock_init(&kbdev->mmu_mask_change); - mutex_init(&kbdev->mmu_hw_mutex); -#ifdef CONFIG_ARM64 - kbdev->cci_snoop_enabled = false; - np = kbdev->dev->of_node; - if (np != NULL) { - if (of_property_read_u32(np, "snoop_enable_smc", - &kbdev->snoop_enable_smc)) - kbdev->snoop_enable_smc = 0; - if (of_property_read_u32(np, "snoop_disable_smc", - &kbdev->snoop_disable_smc)) - kbdev->snoop_disable_smc = 0; - /* Either both or none of the calls should be provided. */ - if (!((kbdev->snoop_disable_smc == 0 - && kbdev->snoop_enable_smc == 0) - || (kbdev->snoop_disable_smc != 0 - && kbdev->snoop_enable_smc != 0))) { - WARN_ON(1); - err = -EINVAL; - goto fail; - } - } -#endif /* CONFIG_ARM64 */ - /* Get the list of workarounds for issues on the current HW - * (identified by the GPU_ID register) - */ - err = kbase_hw_set_issues_mask(kbdev); - if (err) - goto fail; - - /* Set the list of features available on the current HW - * (identified by the GPU_ID register) - */ - kbase_hw_set_features_mask(kbdev); - - kbase_gpuprops_set_features(kbdev); - - /* On Linux 4.0+, dma coherency is determined from device tree */ -#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) - set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); -#endif - - /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our - * device structure was created by device-tree - */ - if (!kbdev->dev->dma_mask) - kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; - - err = dma_set_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); - if (err) - goto dma_set_mask_failed; - - err = dma_set_coherent_mask(kbdev->dev, - DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); - if (err) - goto dma_set_mask_failed; - - kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; - - err = kbase_device_all_as_init(kbdev); - if (err) - goto as_init_failed; - - spin_lock_init(&kbdev->hwcnt.lock); - - err = kbasep_trace_init(kbdev); - if (err) - goto term_as; - - init_waitqueue_head(&kbdev->cache_clean_wait); - - kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); - - atomic_set(&kbdev->ctx_num, 0); - - err = kbase_instr_backend_init(kbdev); - if (err) - goto term_trace; - - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; - - kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - else - kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); - - return 0; -term_trace: - kbasep_trace_term(kbdev); -term_as: - kbase_device_all_as_term(kbdev); -as_init_failed: -dma_set_mask_failed: -fail: - return err; -} - -void kbase_device_term(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev); - -#if KBASE_TRACE_ENABLE - kbase_debug_assert_register_hook(NULL, NULL); -#endif - - kbase_instr_backend_term(kbdev); - - kbasep_trace_term(kbdev); - - kbase_device_all_as_term(kbdev); -} - -void kbase_device_free(struct kbase_device *kbdev) -{ - kfree(kbdev); -} - -/* - * Device trace functions - */ -#if KBASE_TRACE_ENABLE - -static int kbasep_trace_init(struct kbase_device *kbdev) -{ - struct kbase_trace *rbuf; - - rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); - - if (!rbuf) - return -EINVAL; - - kbdev->trace_rbuf = rbuf; - spin_lock_init(&kbdev->trace_lock); - return 0; -} - -static void kbasep_trace_term(struct kbase_device *kbdev) -{ - kfree(kbdev->trace_rbuf); -} - -static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len) -{ - s32 written = 0; - - /* Initial part of message */ - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); - - if (trace_msg->katom) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); - - /* NOTE: Could add function callbacks to handle different message types */ - /* Jobslot present */ - if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); - - /* Refcount present */ - if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); - - written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); - - /* Rest of message */ - written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); -} - -static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg) -{ - char buffer[DEBUG_MESSAGE_SIZE]; - - kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); - dev_dbg(kbdev->dev, "%s", buffer); -} - -void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) -{ - unsigned long irqflags; - struct kbase_trace *trace_msg; - - spin_lock_irqsave(&kbdev->trace_lock, irqflags); - - trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; - - /* Fill the message */ - trace_msg->thread_id = task_pid_nr(current); - trace_msg->cpu = task_cpu(current); - - getnstimeofday(&trace_msg->timestamp); - - trace_msg->code = code; - trace_msg->ctx = ctx; - - if (NULL == katom) { - trace_msg->katom = false; - } else { - trace_msg->katom = true; - trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); - trace_msg->atom_udata[0] = katom->udata.blob[0]; - trace_msg->atom_udata[1] = katom->udata.blob[1]; - } - - trace_msg->gpu_addr = gpu_addr; - trace_msg->jobslot = jobslot; - trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); - trace_msg->info_val = info_val; - trace_msg->flags = flags; - - /* Update the ringbuffer indices */ - kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; - if (kbdev->trace_next_in == kbdev->trace_first_out) - kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; - - /* Done */ - - spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); -} - -void kbasep_trace_clear(struct kbase_device *kbdev) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->trace_lock, flags); - kbdev->trace_first_out = kbdev->trace_next_in; - spin_unlock_irqrestore(&kbdev->trace_lock, flags); -} - -void kbasep_trace_dump(struct kbase_device *kbdev) -{ - unsigned long flags; - u32 start; - u32 end; - - dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); - spin_lock_irqsave(&kbdev->trace_lock, flags); - start = kbdev->trace_first_out; - end = kbdev->trace_next_in; - - while (start != end) { - struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; - - kbasep_trace_dump_msg(kbdev, trace_msg); - - start = (start + 1) & KBASE_TRACE_MASK; - } - dev_dbg(kbdev->dev, "TRACE_END"); - - spin_unlock_irqrestore(&kbdev->trace_lock, flags); - - KBASE_TRACE_CLEAR(kbdev); -} - -static void kbasep_trace_hook_wrapper(void *param) -{ - struct kbase_device *kbdev = (struct kbase_device *)param; - - kbasep_trace_dump(kbdev); -} - -#ifdef CONFIG_DEBUG_FS -struct trace_seq_state { - struct kbase_trace trace_buf[KBASE_TRACE_SIZE]; - u32 start; - u32 end; -}; - -static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) -{ - struct trace_seq_state *state = s->private; - int i; - - if (*pos > KBASE_TRACE_SIZE) - return NULL; - i = state->start + *pos; - if ((state->end >= state->start && i >= state->end) || - i >= state->end + KBASE_TRACE_SIZE) - return NULL; - - i &= KBASE_TRACE_MASK; - - return &state->trace_buf[i]; -} - -static void kbasep_trace_seq_stop(struct seq_file *s, void *data) -{ -} - -static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) -{ - struct trace_seq_state *state = s->private; - int i; - - (*pos)++; - - i = (state->start + *pos) & KBASE_TRACE_MASK; - if (i == state->end) - return NULL; - - return &state->trace_buf[i]; -} - -static int kbasep_trace_seq_show(struct seq_file *s, void *data) -{ - struct kbase_trace *trace_msg = data; - char buffer[DEBUG_MESSAGE_SIZE]; - - kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); - seq_printf(s, "%s\n", buffer); - return 0; -} - -static const struct seq_operations kbasep_trace_seq_ops = { - .start = kbasep_trace_seq_start, - .next = kbasep_trace_seq_next, - .stop = kbasep_trace_seq_stop, - .show = kbasep_trace_seq_show, -}; - -static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) -{ - struct kbase_device *kbdev = inode->i_private; - unsigned long flags; - - struct trace_seq_state *state; - - state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); - if (!state) - return -ENOMEM; - - spin_lock_irqsave(&kbdev->trace_lock, flags); - state->start = kbdev->trace_first_out; - state->end = kbdev->trace_next_in; - memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); - spin_unlock_irqrestore(&kbdev->trace_lock, flags); - - return 0; -} - -static const struct file_operations kbasep_trace_debugfs_fops = { - .open = kbasep_trace_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -void kbasep_trace_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("mali_trace", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_trace_debugfs_fops); -} - -#else -void kbasep_trace_debugfs_init(struct kbase_device *kbdev) -{ -} -#endif /* CONFIG_DEBUG_FS */ - -#else /* KBASE_TRACE_ENABLE */ -static int kbasep_trace_init(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); - return 0; -} - -static void kbasep_trace_term(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -static void kbasep_trace_hook_wrapper(void *param) -{ - CSTD_UNUSED(param); -} - -void kbasep_trace_dump(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} -#endif /* KBASE_TRACE_ENABLE */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_disjoint_events.c deleted file mode 100755 index 68eb4ed0715d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_disjoint_events.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Base kernel disjoint events helper functions - */ - -#include - -void kbase_disjoint_init(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - atomic_set(&kbdev->disjoint_event.count, 0); - atomic_set(&kbdev->disjoint_event.state, 0); -} - -/* increment the disjoint event count */ -void kbase_disjoint_event(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - atomic_inc(&kbdev->disjoint_event.count); -} - -/* increment the state and the event counter */ -void kbase_disjoint_state_up(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - atomic_inc(&kbdev->disjoint_event.state); - - kbase_disjoint_event(kbdev); -} - -/* decrement the state */ -void kbase_disjoint_state_down(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); - - kbase_disjoint_event(kbdev); - - atomic_dec(&kbdev->disjoint_event.state); -} - -/* increments the count only if the state is > 0 */ -void kbase_disjoint_event_potential(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - if (atomic_read(&kbdev->disjoint_event.state)) - kbase_disjoint_event(kbdev); -} - -u32 kbase_disjoint_event_get(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - - return atomic_read(&kbdev->disjoint_event.count); -} -KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.c deleted file mode 100755 index 6a95900b3b76..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as - * it will be set there. - */ -#include "mali_kbase_dma_fence.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static void -kbase_dma_fence_work(struct work_struct *pwork); - -static void -kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); -} - -static void -kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) -{ - list_del(&katom->queue); -} - -static int -kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, - struct ww_acquire_ctx *ctx) -{ - struct reservation_object *content_res = NULL; - unsigned int content_res_idx = 0; - unsigned int r; - int err = 0; - - ww_acquire_init(ctx, &reservation_ww_class); - -retry: - for (r = 0; r < info->dma_fence_resv_count; r++) { - if (info->resv_objs[r] == content_res) { - content_res = NULL; - continue; - } - - err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); - if (err) - goto error; - } - - ww_acquire_done(ctx); - return err; - -error: - content_res_idx = r; - - /* Unlock the locked one ones */ - while (r--) - ww_mutex_unlock(&info->resv_objs[r]->lock); - - if (content_res) - ww_mutex_unlock(&content_res->lock); - - /* If we deadlock try with lock_slow and retry */ - if (err == -EDEADLK) { - content_res = info->resv_objs[content_res_idx]; - ww_mutex_lock_slow(&content_res->lock, ctx); - goto retry; - } - - /* If we are here the function failed */ - ww_acquire_fini(ctx); - return err; -} - -static void -kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, - struct ww_acquire_ctx *ctx) -{ - unsigned int r; - - for (r = 0; r < info->dma_fence_resv_count; r++) - ww_mutex_unlock(&info->resv_objs[r]->lock); - ww_acquire_fini(ctx); -} - -/** - * kbase_dma_fence_queue_work() - Queue work to handle @katom - * @katom: Pointer to atom for which to queue work - * - * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and - * submit the atom. - */ -static void -kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - bool ret; - - INIT_WORK(&katom->work, kbase_dma_fence_work); - ret = queue_work(kctx->dma_fence.wq, &katom->work); - /* Warn if work was already queued, that should not happen. */ - WARN_ON(!ret); -} - -/** - * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom - * @katom: Katom to cancel - * - * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. - */ -static void -kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&katom->kctx->jctx.lock); - - /* Cancel callbacks and clean up. */ - kbase_fence_free_callbacks(katom); - - /* Mark the atom as handled in case all fences signaled just before - * canceling the callbacks and the worker was queued. - */ - kbase_fence_dep_count_set(katom, -1); - - /* Prevent job_done_nolock from being called twice on an atom when - * there is a race between job completion and cancellation. - */ - - if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { - /* Wait was cancelled - zap the atom */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(katom->kctx->kbdev); - } -} - -/** - * kbase_dma_fence_work() - Worker thread called when a fence is signaled - * @pwork: work_struct containing a pointer to a katom - * - * This function will clean and mark all dependencies as satisfied - */ -static void -kbase_dma_fence_work(struct work_struct *pwork) -{ - struct kbase_jd_atom *katom; - struct kbase_jd_context *ctx; - - katom = container_of(pwork, struct kbase_jd_atom, work); - ctx = &katom->kctx->jctx; - - mutex_lock(&ctx->lock); - if (kbase_fence_dep_count_read(katom) != 0) - goto out; - - kbase_fence_dep_count_set(katom, -1); - - /* Remove atom from list of dma-fence waiting atoms. */ - kbase_dma_fence_waiters_remove(katom); - /* Cleanup callbacks. */ - kbase_fence_free_callbacks(katom); - /* - * Queue atom on GPU, unless it has already completed due to a failing - * dependency. Run jd_done_nolock() on the katom if it is completed. - */ - if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) - jd_done_nolock(katom, NULL); - else - kbase_jd_dep_clear_locked(katom); - -out: - mutex_unlock(&ctx->lock); -} - -static void -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) -#else -kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) -#endif -{ - struct kbase_fence_cb *kcb = container_of(cb, - struct kbase_fence_cb, - fence_cb); - struct kbase_jd_atom *katom = kcb->katom; - - /* If the atom is zapped dep_count will be forced to a negative number - * preventing this callback from ever scheduling work. Which in turn - * would reschedule the atom. - */ - - if (kbase_fence_dep_count_dec_and_test(katom)) - kbase_dma_fence_queue_work(katom); -} - -static int -kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, - struct reservation_object *resv, - bool exclusive) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *excl_fence = NULL; - struct fence **shared_fences = NULL; -#else - struct dma_fence *excl_fence = NULL; - struct dma_fence **shared_fences = NULL; -#endif - unsigned int shared_count = 0; - int err, i; - - err = reservation_object_get_fences_rcu(resv, - &excl_fence, - &shared_count, - &shared_fences); - if (err) - return err; - - if (excl_fence) { - err = kbase_fence_add_callback(katom, - excl_fence, - kbase_dma_fence_cb); - - /* Release our reference, taken by reservation_object_get_fences_rcu(), - * to the fence. We have set up our callback (if that was possible), - * and it's the fence's owner is responsible for singling the fence - * before allowing it to disappear. - */ - dma_fence_put(excl_fence); - - if (err) - goto out; - } - - if (exclusive) { - for (i = 0; i < shared_count; i++) { - err = kbase_fence_add_callback(katom, - shared_fences[i], - kbase_dma_fence_cb); - if (err) - goto out; - } - } - - /* Release all our references to the shared fences, taken by - * reservation_object_get_fences_rcu(). We have set up our callback (if - * that was possible), and it's the fence's owner is responsible for - * signaling the fence before allowing it to disappear. - */ -out: - for (i = 0; i < shared_count; i++) - dma_fence_put(shared_fences[i]); - kfree(shared_fences); - - if (err) { - /* - * On error, cancel and clean up all callbacks that was set up - * before the error. - */ - kbase_fence_free_callbacks(katom); - } - - return err; -} - -void kbase_dma_fence_add_reservation(struct reservation_object *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive) -{ - unsigned int i; - - for (i = 0; i < info->dma_fence_resv_count; i++) { - /* Duplicate resource, ignore */ - if (info->resv_objs[i] == resv) - return; - } - - info->resv_objs[info->dma_fence_resv_count] = resv; - if (exclusive) - set_bit(info->dma_fence_resv_count, - info->dma_fence_excl_bitmap); - (info->dma_fence_resv_count)++; -} - -int kbase_dma_fence_wait(struct kbase_jd_atom *katom, - struct kbase_dma_fence_resv_info *info) -{ - int err, i; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - struct ww_acquire_ctx ww_ctx; - - lockdep_assert_held(&katom->kctx->jctx.lock); - - fence = kbase_fence_out_new(katom); - if (!fence) { - err = -ENOMEM; - dev_err(katom->kctx->kbdev->dev, - "Error %d creating fence.\n", err); - return err; - } - - kbase_fence_dep_count_set(katom, 1); - - err = kbase_dma_fence_lock_reservations(info, &ww_ctx); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d locking reservations.\n", err); - kbase_fence_dep_count_set(katom, -1); - kbase_fence_out_remove(katom); - return err; - } - - for (i = 0; i < info->dma_fence_resv_count; i++) { - struct reservation_object *obj = info->resv_objs[i]; - - if (!test_bit(i, info->dma_fence_excl_bitmap)) { - err = reservation_object_reserve_shared(obj); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d reserving space for shared fence.\n", err); - goto end; - } - - err = kbase_dma_fence_add_reservation_callback(katom, obj, false); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d adding reservation to callback.\n", err); - goto end; - } - - reservation_object_add_shared_fence(obj, fence); - } else { - err = kbase_dma_fence_add_reservation_callback(katom, obj, true); - if (err) { - dev_err(katom->kctx->kbdev->dev, - "Error %d adding reservation to callback.\n", err); - goto end; - } - - reservation_object_add_excl_fence(obj, fence); - } - } - -end: - kbase_dma_fence_unlock_reservations(info, &ww_ctx); - - if (likely(!err)) { - /* Test if the callbacks are already triggered */ - if (kbase_fence_dep_count_dec_and_test(katom)) { - kbase_fence_dep_count_set(katom, -1); - kbase_fence_free_callbacks(katom); - } else { - /* Add katom to the list of dma-buf fence waiting atoms - * only if it is still waiting. - */ - kbase_dma_fence_waiters_add(katom); - } - } else { - /* There was an error, cancel callbacks, set dep_count to -1 to - * indicate that the atom has been handled (the caller will - * kill it for us), signal the fence, free callbacks and the - * fence. - */ - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); - kbase_dma_fence_signal(katom); - } - - return err; -} - -void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) -{ - struct list_head *list = &kctx->dma_fence.waiting_resource; - - while (!list_empty(list)) { - struct kbase_jd_atom *katom; - - katom = list_first_entry(list, struct kbase_jd_atom, queue); - kbase_dma_fence_waiters_remove(katom); - kbase_dma_fence_cancel_atom(katom); - } -} - -void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) -{ - /* Cancel callbacks and clean up. */ - if (kbase_fence_free_callbacks(katom)) - kbase_dma_fence_queue_work(katom); -} - -void kbase_dma_fence_signal(struct kbase_jd_atom *katom) -{ - if (!katom->dma_fence.fence) - return; - - /* Signal the atom's fence. */ - dma_fence_signal(katom->dma_fence.fence); - - kbase_fence_out_remove(katom); - - kbase_fence_free_callbacks(katom); -} - -void kbase_dma_fence_term(struct kbase_context *kctx) -{ - destroy_workqueue(kctx->dma_fence.wq); - kctx->dma_fence.wq = NULL; -} - -int kbase_dma_fence_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); - - kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", - WQ_UNBOUND, 1, kctx->pid); - if (!kctx->dma_fence.wq) - return -ENOMEM; - - return 0; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.h deleted file mode 100755 index 2a4d6fcfaaaf..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_dma_fence.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_DMA_FENCE_H_ -#define _KBASE_DMA_FENCE_H_ - -#ifdef CONFIG_MALI_DMA_FENCE - -#include -#include -#include - - -/* Forward declaration from mali_kbase_defs.h */ -struct kbase_jd_atom; -struct kbase_context; - -/** - * struct kbase_dma_fence_resv_info - Structure with list of reservation objects - * @resv_objs: Array of reservation objects to attach the - * new fence to. - * @dma_fence_resv_count: Number of reservation objects in the array. - * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. - * - * This is used by some functions to pass around a collection of data about - * reservation objects. - */ -struct kbase_dma_fence_resv_info { - struct reservation_object **resv_objs; - unsigned int dma_fence_resv_count; - unsigned long *dma_fence_excl_bitmap; -}; - -/** - * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs - * @resv: Reservation object to add to the array. - * @info: Pointer to struct with current reservation info - * @exclusive: Boolean indicating if exclusive access is needed - * - * The function adds a new reservation_object to an existing array of - * reservation_objects. At the same time keeps track of which objects require - * exclusive access in dma_fence_excl_bitmap. - */ -void kbase_dma_fence_add_reservation(struct reservation_object *resv, - struct kbase_dma_fence_resv_info *info, - bool exclusive); - -/** - * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs - * @katom: Katom with the external dependency. - * @info: Pointer to struct with current reservation info - * - * Return: An error code or 0 if succeeds - */ -int kbase_dma_fence_wait(struct kbase_jd_atom *katom, - struct kbase_dma_fence_resv_info *info); - -/** - * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx - * @kctx: Pointer to kbase context - * - * This function will cancel and clean up all katoms on @kctx that is waiting - * on dma-buf fences. - * - * Locking: jctx.lock needs to be held when calling this function. - */ -void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); - -/** - * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom - * @katom: Pointer to katom whose callbacks are to be canceled - * - * This function cancels all dma-buf fence callbacks on @katom, but does not - * cancel the katom itself. - * - * The caller is responsible for ensuring that jd_done_nolock is called on - * @katom. - * - * Locking: jctx.lock must be held when calling this function. - */ -void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); - -/** - * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait - * @katom: Pointer to katom to signal and clean up - * - * This function will signal the @katom's fence, if it has one, and clean up - * the callback data from the katom's wait on earlier fences. - * - * Locking: jctx.lock must be held while calling this function. - */ -void kbase_dma_fence_signal(struct kbase_jd_atom *katom); - -/** - * kbase_dma_fence_term() - Terminate Mali dma-fence context - * @kctx: kbase context to terminate - */ -void kbase_dma_fence_term(struct kbase_context *kctx); - -/** - * kbase_dma_fence_init() - Initialize Mali dma-fence context - * @kctx: kbase context to initialize - */ -int kbase_dma_fence_init(struct kbase_context *kctx); - - -#else /* CONFIG_MALI_DMA_FENCE */ -/* Dummy functions for when dma-buf fence isn't enabled. */ - -static inline int kbase_dma_fence_init(struct kbase_context *kctx) -{ - return 0; -} - -static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} -#endif /* CONFIG_MALI_DMA_FENCE */ -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_event.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_event.c deleted file mode 100755 index 3c9cef364134..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_event.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include -#include -#include - -static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - struct base_jd_udata data; - - lockdep_assert_held(&kctx->jctx.lock); - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(katom != NULL); - KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); - - data = katom->udata; - - KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx); - KBASE_TLSTREAM_TL_DEL_ATOM(katom); - - katom->status = KBASE_JD_ATOM_STATE_UNUSED; - - wake_up(&katom->completed); - - return data; -} - -int kbase_event_pending(struct kbase_context *ctx) -{ - KBASE_DEBUG_ASSERT(ctx); - - return (atomic_read(&ctx->event_count) != 0) || - (atomic_read(&ctx->event_closed) != 0); -} - -KBASE_EXPORT_TEST_API(kbase_event_pending); - -int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) -{ - struct kbase_jd_atom *atom; - - KBASE_DEBUG_ASSERT(ctx); - - mutex_lock(&ctx->event_mutex); - - if (list_empty(&ctx->event_list)) { - if (!atomic_read(&ctx->event_closed)) { - mutex_unlock(&ctx->event_mutex); - return -1; - } - - /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ - mutex_unlock(&ctx->event_mutex); - uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; - memset(&uevent->udata, 0, sizeof(uevent->udata)); - dev_dbg(ctx->kbdev->dev, - "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", - BASE_JD_EVENT_DRV_TERMINATED); - return 0; - } - - /* normal event processing */ - atomic_dec(&ctx->event_count); - atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); - list_del(ctx->event_list.next); - - mutex_unlock(&ctx->event_mutex); - - dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); - uevent->event_code = atom->event_code; - uevent->atom_number = (atom - ctx->jctx.atoms); - - if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) - kbase_jd_free_external_resources(atom); - - mutex_lock(&ctx->jctx.lock); - uevent->udata = kbase_event_process(ctx, atom); - mutex_unlock(&ctx->jctx.lock); - - return 0; -} - -KBASE_EXPORT_TEST_API(kbase_event_dequeue); - -/** - * kbase_event_process_noreport_worker - Worker for processing atoms that do not - * return an event but do have external - * resources - * @data: Work structure - */ -static void kbase_event_process_noreport_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); - struct kbase_context *kctx = katom->kctx; - - if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) - kbase_jd_free_external_resources(katom); - - mutex_lock(&kctx->jctx.lock); - kbase_event_process(kctx, katom); - mutex_unlock(&kctx->jctx.lock); -} - -/** - * kbase_event_process_noreport - Process atoms that do not return an event - * @kctx: Context pointer - * @katom: Atom to be processed - * - * Atoms that do not have external resources will be processed immediately. - * Atoms that do have external resources will be processed on a workqueue, in - * order to avoid locking issues. - */ -static void kbase_event_process_noreport(struct kbase_context *kctx, - struct kbase_jd_atom *katom) -{ - if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - INIT_WORK(&katom->work, kbase_event_process_noreport_worker); - queue_work(kctx->event_workq, &katom->work); - } else { - kbase_event_process(kctx, katom); - } -} - -/** - * kbase_event_coalesce - Move pending events to the main event list - * @kctx: Context pointer - * - * kctx->event_list and kctx->event_coalesce_count must be protected - * by a lock unless this is the last thread using them - * (and we're about to terminate the lock). - * - * Return: The number of pending events moved to the main event list - */ -static int kbase_event_coalesce(struct kbase_context *kctx) -{ - const int event_count = kctx->event_coalesce_count; - - /* Join the list of pending events onto the tail of the main list - and reset it */ - list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); - kctx->event_coalesce_count = 0; - - /* Return the number of events moved */ - return event_count; -} - -void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) -{ - if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { - if (atom->event_code == BASE_JD_EVENT_DONE) { - /* Don't report the event */ - kbase_event_process_noreport(ctx, atom); - return; - } - } - - if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { - /* Don't report the event */ - kbase_event_process_noreport(ctx, atom); - return; - } - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED); - if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { - /* Don't report the event until other event(s) have completed */ - mutex_lock(&ctx->event_mutex); - list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); - ++ctx->event_coalesce_count; - mutex_unlock(&ctx->event_mutex); - } else { - /* Report the event and any pending events now */ - int event_count = 1; - - mutex_lock(&ctx->event_mutex); - event_count += kbase_event_coalesce(ctx); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - atomic_add(event_count, &ctx->event_count); - mutex_unlock(&ctx->event_mutex); - - kbase_event_wakeup(ctx); - } -} -KBASE_EXPORT_TEST_API(kbase_event_post); - -void kbase_event_close(struct kbase_context *kctx) -{ - mutex_lock(&kctx->event_mutex); - atomic_set(&kctx->event_closed, true); - mutex_unlock(&kctx->event_mutex); - kbase_event_wakeup(kctx); -} - -int kbase_event_init(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx); - - INIT_LIST_HEAD(&kctx->event_list); - INIT_LIST_HEAD(&kctx->event_coalesce_list); - mutex_init(&kctx->event_mutex); - atomic_set(&kctx->event_count, 0); - kctx->event_coalesce_count = 0; - atomic_set(&kctx->event_closed, false); - kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); - - if (NULL == kctx->event_workq) - return -EINVAL; - - return 0; -} - -KBASE_EXPORT_TEST_API(kbase_event_init); - -void kbase_event_cleanup(struct kbase_context *kctx) -{ - int event_count; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(kctx->event_workq); - - flush_workqueue(kctx->event_workq); - destroy_workqueue(kctx->event_workq); - - /* We use kbase_event_dequeue to remove the remaining events as that - * deals with all the cleanup needed for the atoms. - * - * Note: use of kctx->event_list without a lock is safe because this must be the last - * thread using it (because we're about to terminate the lock) - */ - event_count = kbase_event_coalesce(kctx); - atomic_add(event_count, &kctx->event_count); - - while (!list_empty(&kctx->event_list)) { - struct base_jd_event_v2 event; - - kbase_event_dequeue(kctx, &event); - } -} - -KBASE_EXPORT_TEST_API(kbase_event_cleanup); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.c deleted file mode 100755 index 3272836efad8..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include -#include -#include - -/* Spin lock protecting all Mali fences as fence->lock. */ -static DEFINE_SPINLOCK(kbase_fence_lock); - -static const char * -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_get_driver_name(struct fence *fence) -#else -kbase_fence_get_driver_name(struct dma_fence *fence) -#endif -{ - return kbase_drv_name; -} - -static const char * -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_get_timeline_name(struct fence *fence) -#else -kbase_fence_get_timeline_name(struct dma_fence *fence) -#endif -{ - return kbase_timeline_name; -} - -static bool -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_enable_signaling(struct fence *fence) -#else -kbase_fence_enable_signaling(struct dma_fence *fence) -#endif -{ - return true; -} - -static void -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_fence_value_str(struct fence *fence, char *str, int size) -#else -kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) -#endif -{ - snprintf(str, size, "%u", fence->seqno); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -const struct fence_ops kbase_fence_ops = { - .wait = fence_default_wait, -#else -const struct dma_fence_ops kbase_fence_ops = { - .wait = dma_fence_default_wait, -#endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, - .fence_value_str = kbase_fence_fence_value_str -}; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -struct fence * -kbase_fence_out_new(struct kbase_jd_atom *katom) -#else -struct dma_fence * -kbase_fence_out_new(struct kbase_jd_atom *katom) -#endif -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - - WARN_ON(katom->dma_fence.fence); - - fence = kzalloc(sizeof(*fence), GFP_KERNEL); - if (!fence) - return NULL; - - dma_fence_init(fence, - &kbase_fence_ops, - &kbase_fence_lock, - katom->dma_fence.context, - atomic_inc_return(&katom->dma_fence.seqno)); - - katom->dma_fence.fence = fence; - - return fence; -} - -bool -kbase_fence_free_callbacks(struct kbase_jd_atom *katom) -{ - struct kbase_fence_cb *cb, *tmp; - bool res = false; - - lockdep_assert_held(&katom->kctx->jctx.lock); - - /* Clean up and free callbacks. */ - list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { - bool ret; - - /* Cancel callbacks that hasn't been called yet. */ - ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); - if (ret) { - int ret; - - /* Fence had not signaled, clean up after - * canceling. - */ - ret = atomic_dec_return(&katom->dma_fence.dep_count); - - if (unlikely(ret == 0)) - res = true; - } - - /* - * Release the reference taken in - * kbase_fence_add_callback(). - */ - dma_fence_put(cb->fence); - list_del(&cb->node); - kfree(cb); - } - - return res; -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -int -kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct fence *fence, - fence_func_t callback) -#else -int -kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct dma_fence *fence, - dma_fence_func_t callback) -#endif -{ - int err = 0; - struct kbase_fence_cb *kbase_fence_cb; - - if (!fence) - return -EINVAL; - - kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); - if (!kbase_fence_cb) - return -ENOMEM; - - kbase_fence_cb->fence = fence; - kbase_fence_cb->katom = katom; - INIT_LIST_HEAD(&kbase_fence_cb->node); - atomic_inc(&katom->dma_fence.dep_count); - - err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, - callback); - if (err == -ENOENT) { - /* Fence signaled, get the completion result */ - err = dma_fence_get_status(fence); - - /* remap success completion to err code */ - if (err == 1) - err = 0; - - kfree(kbase_fence_cb); - atomic_dec(&katom->dma_fence.dep_count); - } else if (err) { - kfree(kbase_fence_cb); - atomic_dec(&katom->dma_fence.dep_count); - } else { - /* - * Get reference to fence that will be kept until callback gets - * cleaned up in kbase_fence_free_callbacks(). - */ - dma_fence_get(fence); - /* Add callback to katom's list of callbacks */ - list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); - } - - return err; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.h deleted file mode 100755 index ab0db40c7fb6..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence.h +++ /dev/null @@ -1,275 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_FENCE_H_ -#define _KBASE_FENCE_H_ - -/* - * mali_kbase_fence.[hc] has common fence code used by both - * - CONFIG_MALI_DMA_FENCE - implicit DMA fences - * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel - */ - -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - -#include -#include "mali_kbase_fence_defs.h" -#include "mali_kbase.h" - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -extern const struct fence_ops kbase_fence_ops; -#else -extern const struct dma_fence_ops kbase_fence_ops; -#endif - -/** -* struct kbase_fence_cb - Mali dma-fence callback data struct -* @fence_cb: Callback function -* @katom: Pointer to katom that is waiting on this callback -* @fence: Pointer to the fence object on which this callback is waiting -* @node: List head for linking this callback to the katom -*/ -struct kbase_fence_cb { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence_cb fence_cb; - struct fence *fence; -#else - struct dma_fence_cb fence_cb; - struct dma_fence *fence; -#endif - struct kbase_jd_atom *katom; - struct list_head node; -}; - -/** - * kbase_fence_out_new() - Creates a new output fence and puts it on the atom - * @katom: Atom to create an output fence for - * - * return: A new fence object on success, NULL on failure. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); -#else -struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); -#endif - -#if defined(CONFIG_SYNC_FILE) -/** - * kbase_fence_fence_in_set() - Assign input fence to atom - * @katom: Atom to assign input fence to - * @fence: Input fence to assign to atom - * - * This function will take ownership of one fence reference! - */ -#define kbase_fence_fence_in_set(katom, fence) \ - do { \ - WARN_ON((katom)->dma_fence.fence_in); \ - (katom)->dma_fence.fence_in = fence; \ - } while (0) -#endif - -/** - * kbase_fence_out_remove() - Removes the output fence from atom - * @katom: Atom to remove output fence for - * - * This will also release the reference to this fence which the atom keeps - */ -static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) -{ - if (katom->dma_fence.fence) { - dma_fence_put(katom->dma_fence.fence); - katom->dma_fence.fence = NULL; - } -} - -#if defined(CONFIG_SYNC_FILE) -/** - * kbase_fence_out_remove() - Removes the input fence from atom - * @katom: Atom to remove input fence for - * - * This will also release the reference to this fence which the atom keeps - */ -static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) -{ - if (katom->dma_fence.fence_in) { - dma_fence_put(katom->dma_fence.fence_in); - katom->dma_fence.fence_in = NULL; - } -} -#endif - -/** - * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us - * @katom: Atom to check output fence for - * - * Return: true if fence exists and is valid, otherwise false - */ -static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) -{ - return katom->dma_fence.fence && - katom->dma_fence.fence->ops == &kbase_fence_ops; -} - -/** - * kbase_fence_out_signal() - Signal output fence of atom - * @katom: Atom to signal output fence for - * @status: Status to signal with (0 for success, < 0 for error) - * - * Return: 0 on success, < 0 on error - */ -static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, - int status) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68)) - katom->dma_fence.fence->error = status; -#else - katom->dma_fence.fence->status = status; -#endif - return dma_fence_signal(katom->dma_fence.fence); -} - -/** - * kbase_fence_add_callback() - Add callback on @fence to block @katom - * @katom: Pointer to katom that will be blocked by @fence - * @fence: Pointer to fence on which to set up the callback - * @callback: Pointer to function to be called when fence is signaled - * - * Caller needs to hold a reference to @fence when calling this function, and - * the caller is responsible for releasing that reference. An additional - * reference to @fence will be taken when the callback was successfully set up - * and @fence needs to be kept valid until the callback has been called and - * cleanup have been done. - * - * Return: 0 on success: fence was either already signaled, or callback was - * set up. Negative error code is returned on error. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -int kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct fence *fence, - fence_func_t callback); -#else -int kbase_fence_add_callback(struct kbase_jd_atom *katom, - struct dma_fence *fence, - dma_fence_func_t callback); -#endif - -/** - * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value - * @katom: Atom to set dep_count for - * @val: value to set dep_count to - * - * The dep_count is available to the users of this module so that they can - * synchronize completion of the wait with cancellation and adding of more - * callbacks. For instance, a user could do the following: - * - * dep_count set to 1 - * callback #1 added, dep_count is increased to 2 - * callback #1 happens, dep_count decremented to 1 - * since dep_count > 0, no completion is done - * callback #2 is added, dep_count is increased to 2 - * dep_count decremented to 1 - * callback #2 happens, dep_count decremented to 0 - * since dep_count now is zero, completion executes - * - * The dep_count can also be used to make sure that the completion only - * executes once. This is typically done by setting dep_count to -1 for the - * thread that takes on this responsibility. - */ -static inline void -kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) -{ - atomic_set(&katom->dma_fence.dep_count, val); -} - -/** - * kbase_fence_dep_count_dec_and_test() - Decrements dep_count - * @katom: Atom to decrement dep_count for - * - * See @kbase_fence_dep_count_set for general description about dep_count - * - * Return: true if value was decremented to zero, otherwise false - */ -static inline bool -kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) -{ - return atomic_dec_and_test(&katom->dma_fence.dep_count); -} - -/** - * kbase_fence_dep_count_read() - Returns the current dep_count value - * @katom: Pointer to katom - * - * See @kbase_fence_dep_count_set for general description about dep_count - * - * Return: The current dep_count value - */ -static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) -{ - return atomic_read(&katom->dma_fence.dep_count); -} - -/** - * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom - * @katom: Pointer to katom - * - * This function will free all fence callbacks on the katom's list of - * callbacks. Callbacks that have not yet been called, because their fence - * hasn't yet signaled, will first be removed from the fence. - * - * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. - * - * Return: true if dep_count reached 0, otherwise false. - */ -bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); - -#if defined(CONFIG_SYNC_FILE) -/** - * kbase_fence_in_get() - Retrieve input fence for atom. - * @katom: Atom to get input fence from - * - * A ref will be taken for the fence, so use @kbase_fence_put() to release it - * - * Return: The fence, or NULL if there is no input fence for atom - */ -#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) -#endif - -/** - * kbase_fence_out_get() - Retrieve output fence for atom. - * @katom: Atom to get output fence from - * - * A ref will be taken for the fence, so use @kbase_fence_put() to release it - * - * Return: The fence, or NULL if there is no output fence for atom - */ -#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) - -/** - * kbase_fence_put() - Releases a reference to a fence - * @fence: Fence to release reference for. - */ -#define kbase_fence_put(fence) dma_fence_put(fence) - - -#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */ - -#endif /* _KBASE_FENCE_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence_defs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence_defs.h deleted file mode 100755 index 607a95c1b2ad..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_fence_defs.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_FENCE_DEFS_H_ -#define _KBASE_FENCE_DEFS_H_ - -/* - * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) - * This file hides the compatibility issues with this for the rest the driver - */ - -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - -#include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - -#include - -#define dma_fence_context_alloc(a) fence_context_alloc(a) -#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) -#define dma_fence_get(a) fence_get(a) -#define dma_fence_put(a) fence_put(a) -#define dma_fence_signal(a) fence_signal(a) -#define dma_fence_is_signaled(a) fence_is_signaled(a) -#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) -#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) - -#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) -#else -#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) -#endif - -#else - -#include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) -#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ - (a)->status ?: 1 \ - : 0) -#endif - -#endif /* < 4.10.0 */ - -#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ - -#endif /* _KBASE_FENCE_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator.h deleted file mode 100755 index 4f5481721027..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* NB taken from gator */ -/* - * List of possible actions to be controlled by DS-5 Streamline. - * The following numbers are used by gator to control the frame buffer dumping - * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because - * they are unknown inside gator. - */ -#ifndef _KBASE_GATOR_H_ -#define _KBASE_GATOR_H_ - -#ifdef CONFIG_MALI_GATOR_SUPPORT -#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) -#define GATOR_JOB_SLOT_START 1 -#define GATOR_JOB_SLOT_STOP 2 -#define GATOR_JOB_SLOT_SOFT_STOPPED 3 - -void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id); -void kbase_trace_mali_pm_status(u32 event, u64 value); -void kbase_trace_mali_pm_power_off(u32 event, u64 value); -void kbase_trace_mali_pm_power_on(u32 event, u64 value); -void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); -void kbase_trace_mali_mmu_as_in_use(int event); -void kbase_trace_mali_mmu_as_released(int event); -void kbase_trace_mali_total_alloc_pages_change(long long int event); - -#endif /* CONFIG_MALI_GATOR_SUPPORT */ - -#endif /* _KBASE_GATOR_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.c deleted file mode 100755 index 1719edf1e978..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase.h" -#include "mali_kbase_hw.h" -#include "mali_kbase_mem_linux.h" -#include "mali_kbase_gator_api.h" -#include "mali_kbase_gator_hwcnt_names.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_virtualizer.h" - -#define MALI_MAX_CORES_PER_GROUP 4 -#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 -#define MALI_COUNTERS_PER_BLOCK 64 -#define MALI_BYTES_PER_COUNTER 4 - -struct kbase_gator_hwcnt_handles { - struct kbase_device *kbdev; - struct kbase_hwcnt_virtualizer_client *hvcli; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer dump_buf; - struct work_struct dump_work; - int dump_complete; - spinlock_t dump_lock; -}; - -static void dump_worker(struct work_struct *work); - -const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) -{ - const char * const *hardware_counters; - struct kbase_device *kbdev; - uint32_t product_id; - uint32_t count; - - if (!total_counters) - return NULL; - - /* Get the first device - it doesn't matter in this case */ - kbdev = kbase_find_device(-1); - if (!kbdev) - return NULL; - - product_id = kbdev->gpu_props.props.core_props.product_id; - - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (GPU_ID2_MODEL_MATCH_VALUE(product_id)) { - case GPU_ID2_PRODUCT_TMIX: - hardware_counters = hardware_counters_mali_tMIx; - count = ARRAY_SIZE(hardware_counters_mali_tMIx); - break; - case GPU_ID2_PRODUCT_THEX: - hardware_counters = hardware_counters_mali_tHEx; - count = ARRAY_SIZE(hardware_counters_mali_tHEx); - break; - case GPU_ID2_PRODUCT_TSIX: - hardware_counters = hardware_counters_mali_tSIx; - count = ARRAY_SIZE(hardware_counters_mali_tSIx); - break; - case GPU_ID2_PRODUCT_TDVX: - hardware_counters = hardware_counters_mali_tSIx; - count = ARRAY_SIZE(hardware_counters_mali_tSIx); - break; - case GPU_ID2_PRODUCT_TNOX: - hardware_counters = hardware_counters_mali_tNOx; - count = ARRAY_SIZE(hardware_counters_mali_tNOx); - break; - case GPU_ID2_PRODUCT_TGOX: - hardware_counters = hardware_counters_mali_tGOx; - count = ARRAY_SIZE(hardware_counters_mali_tGOx); - break; - case GPU_ID2_PRODUCT_TKAX: - hardware_counters = hardware_counters_mali_tKAx; - count = ARRAY_SIZE(hardware_counters_mali_tKAx); - break; - case GPU_ID2_PRODUCT_TTRX: - hardware_counters = hardware_counters_mali_tTRx; - count = ARRAY_SIZE(hardware_counters_mali_tTRx); - break; - default: - hardware_counters = NULL; - count = 0; - dev_err(kbdev->dev, "Unrecognized product ID: %u\n", - product_id); - break; - } - } else { - switch (product_id) { - /* If we are using a Mali-T60x device */ - case GPU_ID_PI_T60X: - hardware_counters = hardware_counters_mali_t60x; - count = ARRAY_SIZE(hardware_counters_mali_t60x); - break; - /* If we are using a Mali-T62x device */ - case GPU_ID_PI_T62X: - hardware_counters = hardware_counters_mali_t62x; - count = ARRAY_SIZE(hardware_counters_mali_t62x); - break; - /* If we are using a Mali-T72x device */ - case GPU_ID_PI_T72X: - hardware_counters = hardware_counters_mali_t72x; - count = ARRAY_SIZE(hardware_counters_mali_t72x); - break; - /* If we are using a Mali-T76x device */ - case GPU_ID_PI_T76X: - hardware_counters = hardware_counters_mali_t76x; - count = ARRAY_SIZE(hardware_counters_mali_t76x); - break; - /* If we are using a Mali-T82x device */ - case GPU_ID_PI_T82X: - hardware_counters = hardware_counters_mali_t82x; - count = ARRAY_SIZE(hardware_counters_mali_t82x); - break; - /* If we are using a Mali-T83x device */ - case GPU_ID_PI_T83X: - hardware_counters = hardware_counters_mali_t83x; - count = ARRAY_SIZE(hardware_counters_mali_t83x); - break; - /* If we are using a Mali-T86x device */ - case GPU_ID_PI_T86X: - hardware_counters = hardware_counters_mali_t86x; - count = ARRAY_SIZE(hardware_counters_mali_t86x); - break; - /* If we are using a Mali-T88x device */ - case GPU_ID_PI_TFRX: - hardware_counters = hardware_counters_mali_t88x; - count = ARRAY_SIZE(hardware_counters_mali_t88x); - break; - default: - hardware_counters = NULL; - count = 0; - dev_err(kbdev->dev, "Unrecognized product ID: %u\n", - product_id); - break; - } - } - - /* Release the kbdev reference. */ - kbase_release_device(kbdev); - - *total_counters = count; - - /* If we return a string array take a reference on the module (or fail). */ - if (hardware_counters && !try_module_get(THIS_MODULE)) - return NULL; - - return hardware_counters; -} -KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names); - -void kbase_gator_hwcnt_term_names(void) -{ - /* Release the module reference. */ - module_put(THIS_MODULE); -} -KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); - -struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) -{ - int errcode; - struct kbase_gator_hwcnt_handles *hand; - const struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_physical_enable_map phys_map; - uint32_t dump_size = 0, i = 0; - - if (!in_out_info) - return NULL; - - hand = kzalloc(sizeof(*hand), GFP_KERNEL); - if (!hand) - return NULL; - - INIT_WORK(&hand->dump_work, dump_worker); - spin_lock_init(&hand->dump_lock); - - /* Get the first device */ - hand->kbdev = kbase_find_device(-1); - if (!hand->kbdev) - goto free_hand; - - metadata = kbase_hwcnt_virtualizer_metadata( - hand->kbdev->hwcnt_gpu_virt); - if (!metadata) - goto release_device; - - errcode = kbase_hwcnt_enable_map_alloc(metadata, &hand->enable_map); - if (errcode) - goto release_device; - - errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hand->dump_buf); - if (errcode) - goto free_enable_map; - - in_out_info->kernel_dump_buffer = hand->dump_buf.dump_buf; - - in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; - in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; - in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; - - /* If we are using a v4 device (Mali-T6xx or Mali-T72x) */ - if (kbase_hw_has_feature(hand->kbdev, BASE_HW_FEATURE_V4)) { - uint32_t cg, j; - uint64_t core_mask; - - /* There are 8 hardware counters blocks per core group */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * - MALI_MAX_NUM_BLOCKS_PER_GROUP * - in_out_info->nr_core_groups, GFP_KERNEL); - - if (!in_out_info->hwc_layout) - goto free_dump_buf; - - dump_size = in_out_info->nr_core_groups * - MALI_MAX_NUM_BLOCKS_PER_GROUP * - MALI_COUNTERS_PER_BLOCK * - MALI_BYTES_PER_COUNTER; - - for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { - core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; - - for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { - if (core_mask & (1u << j)) - in_out_info->hwc_layout[i++] = SHADER_BLOCK; - else - in_out_info->hwc_layout[i++] = RESERVED_BLOCK; - } - - in_out_info->hwc_layout[i++] = TILER_BLOCK; - in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; - - in_out_info->hwc_layout[i++] = RESERVED_BLOCK; - - if (0 == cg) - in_out_info->hwc_layout[i++] = JM_BLOCK; - else - in_out_info->hwc_layout[i++] = RESERVED_BLOCK; - } - /* If we are using any other device */ - } else { - uint32_t nr_l2, nr_sc_bits, j; - uint64_t core_mask; - - nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; - - core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; - - nr_sc_bits = fls64(core_mask); - - /* The job manager and tiler sets of counters - * are always present */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); - - if (!in_out_info->hwc_layout) - goto free_dump_buf; - - dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; - - in_out_info->hwc_layout[i++] = JM_BLOCK; - in_out_info->hwc_layout[i++] = TILER_BLOCK; - - for (j = 0; j < nr_l2; j++) - in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; - - while (core_mask != 0ull) { - if ((core_mask & 1ull) != 0ull) - in_out_info->hwc_layout[i++] = SHADER_BLOCK; - else - in_out_info->hwc_layout[i++] = RESERVED_BLOCK; - core_mask >>= 1; - } - } - - /* Calculated dump size must be the same as real dump size */ - if (WARN_ON(dump_size != metadata->dump_buf_bytes)) - goto free_layout; - - in_out_info->nr_hwc_blocks = i; - in_out_info->size = dump_size; - - phys_map.jm_bm = in_out_info->bitmask[JM_BLOCK]; - phys_map.tiler_bm = in_out_info->bitmask[TILER_BLOCK]; - phys_map.shader_bm = in_out_info->bitmask[SHADER_BLOCK]; - phys_map.mmu_l2_bm = in_out_info->bitmask[MMU_L2_BLOCK]; - kbase_hwcnt_gpu_enable_map_from_physical(&hand->enable_map, &phys_map); - errcode = kbase_hwcnt_virtualizer_client_create( - hand->kbdev->hwcnt_gpu_virt, &hand->enable_map, &hand->hvcli); - if (errcode) { - dev_err(hand->kbdev->dev, - "Failed to register gator with hwcnt virtualizer core"); - goto free_layout; - } - - return hand; - -free_layout: - kfree(in_out_info->hwc_layout); -free_dump_buf: - kbase_hwcnt_dump_buffer_free(&hand->dump_buf); -free_enable_map: - kbase_hwcnt_enable_map_free(&hand->enable_map); -release_device: - kbase_release_device(hand->kbdev); -free_hand: - kfree(hand); - return NULL; -} -KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); - -void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles) -{ - if (in_out_info) - kfree(in_out_info->hwc_layout); - - if (opaque_handles) { - cancel_work_sync(&opaque_handles->dump_work); - kbase_hwcnt_virtualizer_client_destroy(opaque_handles->hvcli); - kbase_hwcnt_dump_buffer_free(&opaque_handles->dump_buf); - kbase_hwcnt_enable_map_free(&opaque_handles->enable_map); - kbase_release_device(opaque_handles->kbdev); - kfree(opaque_handles); - } -} -KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); - -static void dump_worker(struct work_struct *work) -{ - int errcode; - u64 ts_start_ns; - u64 ts_end_ns; - struct kbase_gator_hwcnt_handles *hand; - - hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); - errcode = kbase_hwcnt_virtualizer_client_dump( - hand->hvcli, &ts_start_ns, &ts_end_ns, &hand->dump_buf); - if (!errcode) { - /* Patch the header to hide other client's counter choices */ - kbase_hwcnt_gpu_patch_dump_headers( - &hand->dump_buf, &hand->enable_map); - /* Zero all non-enabled counters (currently undefined values) */ - kbase_hwcnt_dump_buffer_zero_non_enabled( - &hand->dump_buf, &hand->enable_map); - spin_lock_bh(&hand->dump_lock); - hand->dump_complete = 1; - spin_unlock_bh(&hand->dump_lock); - } else { - schedule_work(&hand->dump_work); - } -} - -uint32_t kbase_gator_instr_hwcnt_dump_complete( - struct kbase_gator_hwcnt_handles *opaque_handles, - uint32_t * const success) -{ - - if (opaque_handles && success) { - *success = opaque_handles->dump_complete; - opaque_handles->dump_complete = 0; - return *success; - } - return 0; -} -KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); - -uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) -{ - if (opaque_handles) - schedule_work(&opaque_handles->dump_work); - return 0; -} -KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.h deleted file mode 100755 index bd0589ed6c1a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_api.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_GATOR_API_H_ -#define _KBASE_GATOR_API_H_ - -/** - * @brief This file describes the API used by Gator to fetch hardware counters. - */ - -/* This define is used by the gator kernel module compile to select which DDK - * API calling convention to use. If not defined (legacy DDK) gator assumes - * version 1. The version to DDK release mapping is: - * Version 1 API: DDK versions r1px, r2px - * Version 2 API: DDK versions r3px, r4px - * Version 3 API: DDK version r5p0 and newer - * - * API Usage - * ========= - * - * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter - * names for the GPU present in this device. - * - * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for - * the counters you want enabled. The enables can all be set for simplicity in - * most use cases, but disabling some will let you minimize bandwidth impact. - * - * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a - * counter context. On successful return the DDK will have populated the - * structure with a variety of useful information. - * - * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a - * counter dump. If this returns a non-zero value the request has been queued, - * otherwise the driver has been unable to do so (typically because of another - * user of the instrumentation exists concurrently). - * - * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously - * requested dump has been succesful. If this returns non-zero the counter dump - * has resolved, but the value of *success must also be tested as the dump - * may have not been successful. If it returns zero the counter dump was - * abandoned due to the device being busy (typically because of another - * user of the instrumentation exists concurrently). - * - * 6] Process the counters stored in the buffer pointed to by ... - * - * kbase_gator_hwcnt_info->kernel_dump_buffer - * - * In pseudo code you can find all of the counters via this approach: - * - * - * hwcnt_info # pointer to kbase_gator_hwcnt_info structure - * hwcnt_name # pointer to name list - * - * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer - * - * # Iterate over each 64-counter block in this GPU configuration - * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) { - * hwc_type type = hwcnt_info->hwc_layout[i]; - * - * # Skip reserved type blocks - they contain no counters at all - * if( type == RESERVED_BLOCK ) { - * continue; - * } - * - * size_t name_offset = type * 64; - * size_t data_offset = i * 64; - * - * # Iterate over the names of the counters in this block type - * for( j = 0; j < 64; j++) { - * const char * name = hwcnt_name[name_offset+j]; - * - * # Skip empty name strings - there is no counter here - * if( name[0] == '\0' ) { - * continue; - * } - * - * u32 data = hwcnt_data[data_offset+j]; - * - * printk( "COUNTER: %s DATA: %u\n", name, data ); - * } - * } - * - * - * Note that in most implementations you typically want to either SUM or - * AVERAGE multiple instances of the same counter if, for example, you have - * multiple shader cores or multiple L2 caches. The most sensible view for - * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU - * counters. - * - * 7] Goto 4, repeating until you want to stop collecting counters. - * - * 8] Release the dump resources by calling kbase_gator_hwcnt_term(). - * - * 9] Release the name table resources by calling - * kbase_gator_hwcnt_term_names(). This function must only be called if - * init_names() returned a non-NULL value. - **/ - -#define MALI_DDK_GATOR_API_VERSION 3 - -enum hwc_type { - JM_BLOCK = 0, - TILER_BLOCK, - SHADER_BLOCK, - MMU_L2_BLOCK, - RESERVED_BLOCK -}; - -struct kbase_gator_hwcnt_info { - /* Passed from Gator to kbase */ - - /* the bitmask of enabled hardware counters for each counter block */ - uint16_t bitmask[4]; - - /* Passed from kbase to Gator */ - - /* ptr to counter dump memory */ - void *kernel_dump_buffer; - - /* size of counter dump memory */ - uint32_t size; - - /* the ID of the Mali device */ - uint32_t gpu_id; - - /* the number of shader cores in the GPU */ - uint32_t nr_cores; - - /* the number of core groups */ - uint32_t nr_core_groups; - - /* the memory layout of the performance counters */ - enum hwc_type *hwc_layout; - - /* the total number of hardware couter blocks */ - uint32_t nr_hwc_blocks; -}; - -/** - * @brief Opaque block of Mali data which Gator needs to return to the API later. - */ -struct kbase_gator_hwcnt_handles; - -/** - * @brief Initialize the resources Gator needs for performance profiling. - * - * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali - * specific information that will be returned to Gator. On entry Gator must have populated the - * 'bitmask' field with the counters it wishes to enable for each class of counter block. - * Each entry in the array corresponds to a single counter class based on the "hwc_type" - * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables - * the first 4 counters in the block, and so on). See the GPU counter array as returned by - * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU. - * - * @return Pointer to an opaque handle block on success, NULL on error. - */ -extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info); - -/** - * @brief Free all resources once Gator has finished using performance counters. - * - * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the - * Mali specific information that will be returned to Gator. - * @param opaque_handles A wrapper structure for kbase structures. - */ -extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles); - -/** - * @brief Poll whether a counter dump is successful. - * - * @param opaque_handles A wrapper structure for kbase structures. - * @param[out] success Non-zero on success, zero on failure. - * - * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a - * completed dump may not have dumped succesfully, so the caller must test for both - * a completed and successful dump before processing counters. - */ -extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success); - -/** - * @brief Request the generation of a new counter dump. - * - * @param opaque_handles A wrapper structure for kbase structures. - * - * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise. - */ -extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles); - -/** - * @brief This function is used to fetch the names table based on the Mali device in use. - * - * @param[out] total_counters The total number of counters short names in the Mali devices' list. - * - * @return Pointer to an array of strings of length *total_counters. - */ -extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters); - -/** - * @brief This function is used to terminate the use of the names table. - * - * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value. - */ -extern void kbase_gator_hwcnt_term_names(void); - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names.h deleted file mode 100755 index 5d38c7b73553..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names.h +++ /dev/null @@ -1,2178 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_H_ -#define _KBASE_GATOR_HWCNT_NAMES_H_ - -/* - * "Short names" for hardware counters used by Streamline. Counters names are - * stored in accordance with their memory layout in the binary counter block - * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block - * of 64 counters, and each GPU implements the same set of "masters" although - * the counters each master exposes within its block of 64 may vary. - * - * Counters which are an empty string are simply "holes" in the counter memory - * where no counter exists. - */ - -static const char * const hardware_counters_mali_t60x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T60x_MESSAGES_SENT", - "T60x_MESSAGES_RECEIVED", - "T60x_GPU_ACTIVE", - "T60x_IRQ_ACTIVE", - "T60x_JS0_JOBS", - "T60x_JS0_TASKS", - "T60x_JS0_ACTIVE", - "", - "T60x_JS0_WAIT_READ", - "T60x_JS0_WAIT_ISSUE", - "T60x_JS0_WAIT_DEPEND", - "T60x_JS0_WAIT_FINISH", - "T60x_JS1_JOBS", - "T60x_JS1_TASKS", - "T60x_JS1_ACTIVE", - "", - "T60x_JS1_WAIT_READ", - "T60x_JS1_WAIT_ISSUE", - "T60x_JS1_WAIT_DEPEND", - "T60x_JS1_WAIT_FINISH", - "T60x_JS2_JOBS", - "T60x_JS2_TASKS", - "T60x_JS2_ACTIVE", - "", - "T60x_JS2_WAIT_READ", - "T60x_JS2_WAIT_ISSUE", - "T60x_JS2_WAIT_DEPEND", - "T60x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T60x_TI_JOBS_PROCESSED", - "T60x_TI_TRIANGLES", - "T60x_TI_QUADS", - "T60x_TI_POLYGONS", - "T60x_TI_POINTS", - "T60x_TI_LINES", - "T60x_TI_VCACHE_HIT", - "T60x_TI_VCACHE_MISS", - "T60x_TI_FRONT_FACING", - "T60x_TI_BACK_FACING", - "T60x_TI_PRIM_VISIBLE", - "T60x_TI_PRIM_CULLED", - "T60x_TI_PRIM_CLIPPED", - "T60x_TI_LEVEL0", - "T60x_TI_LEVEL1", - "T60x_TI_LEVEL2", - "T60x_TI_LEVEL3", - "T60x_TI_LEVEL4", - "T60x_TI_LEVEL5", - "T60x_TI_LEVEL6", - "T60x_TI_LEVEL7", - "T60x_TI_COMMAND_1", - "T60x_TI_COMMAND_2", - "T60x_TI_COMMAND_3", - "T60x_TI_COMMAND_4", - "T60x_TI_COMMAND_4_7", - "T60x_TI_COMMAND_8_15", - "T60x_TI_COMMAND_16_63", - "T60x_TI_COMMAND_64", - "T60x_TI_COMPRESS_IN", - "T60x_TI_COMPRESS_OUT", - "T60x_TI_COMPRESS_FLUSH", - "T60x_TI_TIMESTAMPS", - "T60x_TI_PCACHE_HIT", - "T60x_TI_PCACHE_MISS", - "T60x_TI_PCACHE_LINE", - "T60x_TI_PCACHE_STALL", - "T60x_TI_WRBUF_HIT", - "T60x_TI_WRBUF_MISS", - "T60x_TI_WRBUF_LINE", - "T60x_TI_WRBUF_PARTIAL", - "T60x_TI_WRBUF_STALL", - "T60x_TI_ACTIVE", - "T60x_TI_LOADING_DESC", - "T60x_TI_INDEX_WAIT", - "T60x_TI_INDEX_RANGE_WAIT", - "T60x_TI_VERTEX_WAIT", - "T60x_TI_PCACHE_WAIT", - "T60x_TI_WRBUF_WAIT", - "T60x_TI_BUS_READ", - "T60x_TI_BUS_WRITE", - "", - "", - "", - "", - "", - "T60x_TI_UTLB_STALL", - "T60x_TI_UTLB_REPLAY_MISS", - "T60x_TI_UTLB_REPLAY_FULL", - "T60x_TI_UTLB_NEW_MISS", - "T60x_TI_UTLB_HIT", - - /* Shader Core */ - "", - "", - "", - "", - "T60x_FRAG_ACTIVE", - "T60x_FRAG_PRIMITIVES", - "T60x_FRAG_PRIMITIVES_DROPPED", - "T60x_FRAG_CYCLES_DESC", - "T60x_FRAG_CYCLES_PLR", - "T60x_FRAG_CYCLES_VERT", - "T60x_FRAG_CYCLES_TRISETUP", - "T60x_FRAG_CYCLES_RAST", - "T60x_FRAG_THREADS", - "T60x_FRAG_DUMMY_THREADS", - "T60x_FRAG_QUADS_RAST", - "T60x_FRAG_QUADS_EZS_TEST", - "T60x_FRAG_QUADS_EZS_KILLED", - "T60x_FRAG_THREADS_LZS_TEST", - "T60x_FRAG_THREADS_LZS_KILLED", - "T60x_FRAG_CYCLES_NO_TILE", - "T60x_FRAG_NUM_TILES", - "T60x_FRAG_TRANS_ELIM", - "T60x_COMPUTE_ACTIVE", - "T60x_COMPUTE_TASKS", - "T60x_COMPUTE_THREADS", - "T60x_COMPUTE_CYCLES_DESC", - "T60x_TRIPIPE_ACTIVE", - "T60x_ARITH_WORDS", - "T60x_ARITH_CYCLES_REG", - "T60x_ARITH_CYCLES_L0", - "T60x_ARITH_FRAG_DEPEND", - "T60x_LS_WORDS", - "T60x_LS_ISSUES", - "T60x_LS_RESTARTS", - "T60x_LS_REISSUES_MISS", - "T60x_LS_REISSUES_VD", - "T60x_LS_REISSUE_ATTRIB_MISS", - "T60x_LS_NO_WB", - "T60x_TEX_WORDS", - "T60x_TEX_BUBBLES", - "T60x_TEX_WORDS_L0", - "T60x_TEX_WORDS_DESC", - "T60x_TEX_ISSUES", - "T60x_TEX_RECIRC_FMISS", - "T60x_TEX_RECIRC_DESC", - "T60x_TEX_RECIRC_MULTI", - "T60x_TEX_RECIRC_PMISS", - "T60x_TEX_RECIRC_CONF", - "T60x_LSC_READ_HITS", - "T60x_LSC_READ_MISSES", - "T60x_LSC_WRITE_HITS", - "T60x_LSC_WRITE_MISSES", - "T60x_LSC_ATOMIC_HITS", - "T60x_LSC_ATOMIC_MISSES", - "T60x_LSC_LINE_FETCHES", - "T60x_LSC_DIRTY_LINE", - "T60x_LSC_SNOOPS", - "T60x_AXI_TLB_STALL", - "T60x_AXI_TLB_MISS", - "T60x_AXI_TLB_TRANSACTION", - "T60x_LS_TLB_MISS", - "T60x_LS_TLB_HIT", - "T60x_AXI_BEATS_READ", - "T60x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T60x_MMU_HIT", - "T60x_MMU_NEW_MISS", - "T60x_MMU_REPLAY_FULL", - "T60x_MMU_REPLAY_MISS", - "T60x_MMU_TABLE_WALK", - "", - "", - "", - "", - "", - "", - "", - "T60x_UTLB_HIT", - "T60x_UTLB_NEW_MISS", - "T60x_UTLB_REPLAY_FULL", - "T60x_UTLB_REPLAY_MISS", - "T60x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T60x_L2_EXT_WRITE_BEATS", - "T60x_L2_EXT_READ_BEATS", - "T60x_L2_ANY_LOOKUP", - "T60x_L2_READ_LOOKUP", - "T60x_L2_SREAD_LOOKUP", - "T60x_L2_READ_REPLAY", - "T60x_L2_READ_SNOOP", - "T60x_L2_READ_HIT", - "T60x_L2_CLEAN_MISS", - "T60x_L2_WRITE_LOOKUP", - "T60x_L2_SWRITE_LOOKUP", - "T60x_L2_WRITE_REPLAY", - "T60x_L2_WRITE_SNOOP", - "T60x_L2_WRITE_HIT", - "T60x_L2_EXT_READ_FULL", - "T60x_L2_EXT_READ_HALF", - "T60x_L2_EXT_WRITE_FULL", - "T60x_L2_EXT_WRITE_HALF", - "T60x_L2_EXT_READ", - "T60x_L2_EXT_READ_LINE", - "T60x_L2_EXT_WRITE", - "T60x_L2_EXT_WRITE_LINE", - "T60x_L2_EXT_WRITE_SMALL", - "T60x_L2_EXT_BARRIER", - "T60x_L2_EXT_AR_STALL", - "T60x_L2_EXT_R_BUF_FULL", - "T60x_L2_EXT_RD_BUF_FULL", - "T60x_L2_EXT_R_RAW", - "T60x_L2_EXT_W_STALL", - "T60x_L2_EXT_W_BUF_FULL", - "T60x_L2_EXT_R_W_HAZARD", - "T60x_L2_TAG_HAZARD", - "T60x_L2_SNOOP_FULL", - "T60x_L2_REPLAY_FULL" -}; -static const char * const hardware_counters_mali_t62x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T62x_MESSAGES_SENT", - "T62x_MESSAGES_RECEIVED", - "T62x_GPU_ACTIVE", - "T62x_IRQ_ACTIVE", - "T62x_JS0_JOBS", - "T62x_JS0_TASKS", - "T62x_JS0_ACTIVE", - "", - "T62x_JS0_WAIT_READ", - "T62x_JS0_WAIT_ISSUE", - "T62x_JS0_WAIT_DEPEND", - "T62x_JS0_WAIT_FINISH", - "T62x_JS1_JOBS", - "T62x_JS1_TASKS", - "T62x_JS1_ACTIVE", - "", - "T62x_JS1_WAIT_READ", - "T62x_JS1_WAIT_ISSUE", - "T62x_JS1_WAIT_DEPEND", - "T62x_JS1_WAIT_FINISH", - "T62x_JS2_JOBS", - "T62x_JS2_TASKS", - "T62x_JS2_ACTIVE", - "", - "T62x_JS2_WAIT_READ", - "T62x_JS2_WAIT_ISSUE", - "T62x_JS2_WAIT_DEPEND", - "T62x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T62x_TI_JOBS_PROCESSED", - "T62x_TI_TRIANGLES", - "T62x_TI_QUADS", - "T62x_TI_POLYGONS", - "T62x_TI_POINTS", - "T62x_TI_LINES", - "T62x_TI_VCACHE_HIT", - "T62x_TI_VCACHE_MISS", - "T62x_TI_FRONT_FACING", - "T62x_TI_BACK_FACING", - "T62x_TI_PRIM_VISIBLE", - "T62x_TI_PRIM_CULLED", - "T62x_TI_PRIM_CLIPPED", - "T62x_TI_LEVEL0", - "T62x_TI_LEVEL1", - "T62x_TI_LEVEL2", - "T62x_TI_LEVEL3", - "T62x_TI_LEVEL4", - "T62x_TI_LEVEL5", - "T62x_TI_LEVEL6", - "T62x_TI_LEVEL7", - "T62x_TI_COMMAND_1", - "T62x_TI_COMMAND_2", - "T62x_TI_COMMAND_3", - "T62x_TI_COMMAND_4", - "T62x_TI_COMMAND_5_7", - "T62x_TI_COMMAND_8_15", - "T62x_TI_COMMAND_16_63", - "T62x_TI_COMMAND_64", - "T62x_TI_COMPRESS_IN", - "T62x_TI_COMPRESS_OUT", - "T62x_TI_COMPRESS_FLUSH", - "T62x_TI_TIMESTAMPS", - "T62x_TI_PCACHE_HIT", - "T62x_TI_PCACHE_MISS", - "T62x_TI_PCACHE_LINE", - "T62x_TI_PCACHE_STALL", - "T62x_TI_WRBUF_HIT", - "T62x_TI_WRBUF_MISS", - "T62x_TI_WRBUF_LINE", - "T62x_TI_WRBUF_PARTIAL", - "T62x_TI_WRBUF_STALL", - "T62x_TI_ACTIVE", - "T62x_TI_LOADING_DESC", - "T62x_TI_INDEX_WAIT", - "T62x_TI_INDEX_RANGE_WAIT", - "T62x_TI_VERTEX_WAIT", - "T62x_TI_PCACHE_WAIT", - "T62x_TI_WRBUF_WAIT", - "T62x_TI_BUS_READ", - "T62x_TI_BUS_WRITE", - "", - "", - "", - "", - "", - "T62x_TI_UTLB_STALL", - "T62x_TI_UTLB_REPLAY_MISS", - "T62x_TI_UTLB_REPLAY_FULL", - "T62x_TI_UTLB_NEW_MISS", - "T62x_TI_UTLB_HIT", - - /* Shader Core */ - "", - "", - "", - "T62x_SHADER_CORE_ACTIVE", - "T62x_FRAG_ACTIVE", - "T62x_FRAG_PRIMITIVES", - "T62x_FRAG_PRIMITIVES_DROPPED", - "T62x_FRAG_CYCLES_DESC", - "T62x_FRAG_CYCLES_FPKQ_ACTIVE", - "T62x_FRAG_CYCLES_VERT", - "T62x_FRAG_CYCLES_TRISETUP", - "T62x_FRAG_CYCLES_EZS_ACTIVE", - "T62x_FRAG_THREADS", - "T62x_FRAG_DUMMY_THREADS", - "T62x_FRAG_QUADS_RAST", - "T62x_FRAG_QUADS_EZS_TEST", - "T62x_FRAG_QUADS_EZS_KILLED", - "T62x_FRAG_THREADS_LZS_TEST", - "T62x_FRAG_THREADS_LZS_KILLED", - "T62x_FRAG_CYCLES_NO_TILE", - "T62x_FRAG_NUM_TILES", - "T62x_FRAG_TRANS_ELIM", - "T62x_COMPUTE_ACTIVE", - "T62x_COMPUTE_TASKS", - "T62x_COMPUTE_THREADS", - "T62x_COMPUTE_CYCLES_DESC", - "T62x_TRIPIPE_ACTIVE", - "T62x_ARITH_WORDS", - "T62x_ARITH_CYCLES_REG", - "T62x_ARITH_CYCLES_L0", - "T62x_ARITH_FRAG_DEPEND", - "T62x_LS_WORDS", - "T62x_LS_ISSUES", - "T62x_LS_RESTARTS", - "T62x_LS_REISSUES_MISS", - "T62x_LS_REISSUES_VD", - "T62x_LS_REISSUE_ATTRIB_MISS", - "T62x_LS_NO_WB", - "T62x_TEX_WORDS", - "T62x_TEX_BUBBLES", - "T62x_TEX_WORDS_L0", - "T62x_TEX_WORDS_DESC", - "T62x_TEX_ISSUES", - "T62x_TEX_RECIRC_FMISS", - "T62x_TEX_RECIRC_DESC", - "T62x_TEX_RECIRC_MULTI", - "T62x_TEX_RECIRC_PMISS", - "T62x_TEX_RECIRC_CONF", - "T62x_LSC_READ_HITS", - "T62x_LSC_READ_MISSES", - "T62x_LSC_WRITE_HITS", - "T62x_LSC_WRITE_MISSES", - "T62x_LSC_ATOMIC_HITS", - "T62x_LSC_ATOMIC_MISSES", - "T62x_LSC_LINE_FETCHES", - "T62x_LSC_DIRTY_LINE", - "T62x_LSC_SNOOPS", - "T62x_AXI_TLB_STALL", - "T62x_AXI_TLB_MISS", - "T62x_AXI_TLB_TRANSACTION", - "T62x_LS_TLB_MISS", - "T62x_LS_TLB_HIT", - "T62x_AXI_BEATS_READ", - "T62x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T62x_MMU_HIT", - "T62x_MMU_NEW_MISS", - "T62x_MMU_REPLAY_FULL", - "T62x_MMU_REPLAY_MISS", - "T62x_MMU_TABLE_WALK", - "", - "", - "", - "", - "", - "", - "", - "T62x_UTLB_HIT", - "T62x_UTLB_NEW_MISS", - "T62x_UTLB_REPLAY_FULL", - "T62x_UTLB_REPLAY_MISS", - "T62x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T62x_L2_EXT_WRITE_BEATS", - "T62x_L2_EXT_READ_BEATS", - "T62x_L2_ANY_LOOKUP", - "T62x_L2_READ_LOOKUP", - "T62x_L2_SREAD_LOOKUP", - "T62x_L2_READ_REPLAY", - "T62x_L2_READ_SNOOP", - "T62x_L2_READ_HIT", - "T62x_L2_CLEAN_MISS", - "T62x_L2_WRITE_LOOKUP", - "T62x_L2_SWRITE_LOOKUP", - "T62x_L2_WRITE_REPLAY", - "T62x_L2_WRITE_SNOOP", - "T62x_L2_WRITE_HIT", - "T62x_L2_EXT_READ_FULL", - "T62x_L2_EXT_READ_HALF", - "T62x_L2_EXT_WRITE_FULL", - "T62x_L2_EXT_WRITE_HALF", - "T62x_L2_EXT_READ", - "T62x_L2_EXT_READ_LINE", - "T62x_L2_EXT_WRITE", - "T62x_L2_EXT_WRITE_LINE", - "T62x_L2_EXT_WRITE_SMALL", - "T62x_L2_EXT_BARRIER", - "T62x_L2_EXT_AR_STALL", - "T62x_L2_EXT_R_BUF_FULL", - "T62x_L2_EXT_RD_BUF_FULL", - "T62x_L2_EXT_R_RAW", - "T62x_L2_EXT_W_STALL", - "T62x_L2_EXT_W_BUF_FULL", - "T62x_L2_EXT_R_W_HAZARD", - "T62x_L2_TAG_HAZARD", - "T62x_L2_SNOOP_FULL", - "T62x_L2_REPLAY_FULL" -}; - -static const char * const hardware_counters_mali_t72x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T72x_GPU_ACTIVE", - "T72x_IRQ_ACTIVE", - "T72x_JS0_JOBS", - "T72x_JS0_TASKS", - "T72x_JS0_ACTIVE", - "T72x_JS1_JOBS", - "T72x_JS1_TASKS", - "T72x_JS1_ACTIVE", - "T72x_JS2_JOBS", - "T72x_JS2_TASKS", - "T72x_JS2_ACTIVE", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T72x_TI_JOBS_PROCESSED", - "T72x_TI_TRIANGLES", - "T72x_TI_QUADS", - "T72x_TI_POLYGONS", - "T72x_TI_POINTS", - "T72x_TI_LINES", - "T72x_TI_FRONT_FACING", - "T72x_TI_BACK_FACING", - "T72x_TI_PRIM_VISIBLE", - "T72x_TI_PRIM_CULLED", - "T72x_TI_PRIM_CLIPPED", - "", - "", - "", - "", - "", - "", - "", - "", - "T72x_TI_ACTIVE", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "T72x_FRAG_ACTIVE", - "T72x_FRAG_PRIMITIVES", - "T72x_FRAG_PRIMITIVES_DROPPED", - "T72x_FRAG_THREADS", - "T72x_FRAG_DUMMY_THREADS", - "T72x_FRAG_QUADS_RAST", - "T72x_FRAG_QUADS_EZS_TEST", - "T72x_FRAG_QUADS_EZS_KILLED", - "T72x_FRAG_THREADS_LZS_TEST", - "T72x_FRAG_THREADS_LZS_KILLED", - "T72x_FRAG_CYCLES_NO_TILE", - "T72x_FRAG_NUM_TILES", - "T72x_FRAG_TRANS_ELIM", - "T72x_COMPUTE_ACTIVE", - "T72x_COMPUTE_TASKS", - "T72x_COMPUTE_THREADS", - "T72x_TRIPIPE_ACTIVE", - "T72x_ARITH_WORDS", - "T72x_ARITH_CYCLES_REG", - "T72x_LS_WORDS", - "T72x_LS_ISSUES", - "T72x_LS_RESTARTS", - "T72x_LS_REISSUES_MISS", - "T72x_TEX_WORDS", - "T72x_TEX_BUBBLES", - "T72x_TEX_ISSUES", - "T72x_LSC_READ_HITS", - "T72x_LSC_READ_MISSES", - "T72x_LSC_WRITE_HITS", - "T72x_LSC_WRITE_MISSES", - "T72x_LSC_ATOMIC_HITS", - "T72x_LSC_ATOMIC_MISSES", - "T72x_LSC_LINE_FETCHES", - "T72x_LSC_DIRTY_LINE", - "T72x_LSC_SNOOPS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*L2 and MMU */ - "", - "", - "", - "", - "T72x_L2_EXT_WRITE_BEAT", - "T72x_L2_EXT_READ_BEAT", - "T72x_L2_READ_SNOOP", - "T72x_L2_READ_HIT", - "T72x_L2_WRITE_SNOOP", - "T72x_L2_WRITE_HIT", - "T72x_L2_EXT_WRITE_SMALL", - "T72x_L2_EXT_BARRIER", - "T72x_L2_EXT_AR_STALL", - "T72x_L2_EXT_W_STALL", - "T72x_L2_SNOOP_FULL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "" -}; - -static const char * const hardware_counters_mali_t76x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T76x_MESSAGES_SENT", - "T76x_MESSAGES_RECEIVED", - "T76x_GPU_ACTIVE", - "T76x_IRQ_ACTIVE", - "T76x_JS0_JOBS", - "T76x_JS0_TASKS", - "T76x_JS0_ACTIVE", - "", - "T76x_JS0_WAIT_READ", - "T76x_JS0_WAIT_ISSUE", - "T76x_JS0_WAIT_DEPEND", - "T76x_JS0_WAIT_FINISH", - "T76x_JS1_JOBS", - "T76x_JS1_TASKS", - "T76x_JS1_ACTIVE", - "", - "T76x_JS1_WAIT_READ", - "T76x_JS1_WAIT_ISSUE", - "T76x_JS1_WAIT_DEPEND", - "T76x_JS1_WAIT_FINISH", - "T76x_JS2_JOBS", - "T76x_JS2_TASKS", - "T76x_JS2_ACTIVE", - "", - "T76x_JS2_WAIT_READ", - "T76x_JS2_WAIT_ISSUE", - "T76x_JS2_WAIT_DEPEND", - "T76x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T76x_TI_JOBS_PROCESSED", - "T76x_TI_TRIANGLES", - "T76x_TI_QUADS", - "T76x_TI_POLYGONS", - "T76x_TI_POINTS", - "T76x_TI_LINES", - "T76x_TI_VCACHE_HIT", - "T76x_TI_VCACHE_MISS", - "T76x_TI_FRONT_FACING", - "T76x_TI_BACK_FACING", - "T76x_TI_PRIM_VISIBLE", - "T76x_TI_PRIM_CULLED", - "T76x_TI_PRIM_CLIPPED", - "T76x_TI_LEVEL0", - "T76x_TI_LEVEL1", - "T76x_TI_LEVEL2", - "T76x_TI_LEVEL3", - "T76x_TI_LEVEL4", - "T76x_TI_LEVEL5", - "T76x_TI_LEVEL6", - "T76x_TI_LEVEL7", - "T76x_TI_COMMAND_1", - "T76x_TI_COMMAND_2", - "T76x_TI_COMMAND_3", - "T76x_TI_COMMAND_4", - "T76x_TI_COMMAND_5_7", - "T76x_TI_COMMAND_8_15", - "T76x_TI_COMMAND_16_63", - "T76x_TI_COMMAND_64", - "T76x_TI_COMPRESS_IN", - "T76x_TI_COMPRESS_OUT", - "T76x_TI_COMPRESS_FLUSH", - "T76x_TI_TIMESTAMPS", - "T76x_TI_PCACHE_HIT", - "T76x_TI_PCACHE_MISS", - "T76x_TI_PCACHE_LINE", - "T76x_TI_PCACHE_STALL", - "T76x_TI_WRBUF_HIT", - "T76x_TI_WRBUF_MISS", - "T76x_TI_WRBUF_LINE", - "T76x_TI_WRBUF_PARTIAL", - "T76x_TI_WRBUF_STALL", - "T76x_TI_ACTIVE", - "T76x_TI_LOADING_DESC", - "T76x_TI_INDEX_WAIT", - "T76x_TI_INDEX_RANGE_WAIT", - "T76x_TI_VERTEX_WAIT", - "T76x_TI_PCACHE_WAIT", - "T76x_TI_WRBUF_WAIT", - "T76x_TI_BUS_READ", - "T76x_TI_BUS_WRITE", - "", - "", - "", - "", - "", - "T76x_TI_UTLB_HIT", - "T76x_TI_UTLB_NEW_MISS", - "T76x_TI_UTLB_REPLAY_FULL", - "T76x_TI_UTLB_REPLAY_MISS", - "T76x_TI_UTLB_STALL", - - /* Shader Core */ - "", - "", - "", - "", - "T76x_FRAG_ACTIVE", - "T76x_FRAG_PRIMITIVES", - "T76x_FRAG_PRIMITIVES_DROPPED", - "T76x_FRAG_CYCLES_DESC", - "T76x_FRAG_CYCLES_FPKQ_ACTIVE", - "T76x_FRAG_CYCLES_VERT", - "T76x_FRAG_CYCLES_TRISETUP", - "T76x_FRAG_CYCLES_EZS_ACTIVE", - "T76x_FRAG_THREADS", - "T76x_FRAG_DUMMY_THREADS", - "T76x_FRAG_QUADS_RAST", - "T76x_FRAG_QUADS_EZS_TEST", - "T76x_FRAG_QUADS_EZS_KILLED", - "T76x_FRAG_THREADS_LZS_TEST", - "T76x_FRAG_THREADS_LZS_KILLED", - "T76x_FRAG_CYCLES_NO_TILE", - "T76x_FRAG_NUM_TILES", - "T76x_FRAG_TRANS_ELIM", - "T76x_COMPUTE_ACTIVE", - "T76x_COMPUTE_TASKS", - "T76x_COMPUTE_THREADS", - "T76x_COMPUTE_CYCLES_DESC", - "T76x_TRIPIPE_ACTIVE", - "T76x_ARITH_WORDS", - "T76x_ARITH_CYCLES_REG", - "T76x_ARITH_CYCLES_L0", - "T76x_ARITH_FRAG_DEPEND", - "T76x_LS_WORDS", - "T76x_LS_ISSUES", - "T76x_LS_REISSUE_ATTR", - "T76x_LS_REISSUES_VARY", - "T76x_LS_VARY_RV_MISS", - "T76x_LS_VARY_RV_HIT", - "T76x_LS_NO_UNPARK", - "T76x_TEX_WORDS", - "T76x_TEX_BUBBLES", - "T76x_TEX_WORDS_L0", - "T76x_TEX_WORDS_DESC", - "T76x_TEX_ISSUES", - "T76x_TEX_RECIRC_FMISS", - "T76x_TEX_RECIRC_DESC", - "T76x_TEX_RECIRC_MULTI", - "T76x_TEX_RECIRC_PMISS", - "T76x_TEX_RECIRC_CONF", - "T76x_LSC_READ_HITS", - "T76x_LSC_READ_OP", - "T76x_LSC_WRITE_HITS", - "T76x_LSC_WRITE_OP", - "T76x_LSC_ATOMIC_HITS", - "T76x_LSC_ATOMIC_OP", - "T76x_LSC_LINE_FETCHES", - "T76x_LSC_DIRTY_LINE", - "T76x_LSC_SNOOPS", - "T76x_AXI_TLB_STALL", - "T76x_AXI_TLB_MISS", - "T76x_AXI_TLB_TRANSACTION", - "T76x_LS_TLB_MISS", - "T76x_LS_TLB_HIT", - "T76x_AXI_BEATS_READ", - "T76x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T76x_MMU_HIT", - "T76x_MMU_NEW_MISS", - "T76x_MMU_REPLAY_FULL", - "T76x_MMU_REPLAY_MISS", - "T76x_MMU_TABLE_WALK", - "T76x_MMU_REQUESTS", - "", - "", - "T76x_UTLB_HIT", - "T76x_UTLB_NEW_MISS", - "T76x_UTLB_REPLAY_FULL", - "T76x_UTLB_REPLAY_MISS", - "T76x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T76x_L2_EXT_WRITE_BEATS", - "T76x_L2_EXT_READ_BEATS", - "T76x_L2_ANY_LOOKUP", - "T76x_L2_READ_LOOKUP", - "T76x_L2_SREAD_LOOKUP", - "T76x_L2_READ_REPLAY", - "T76x_L2_READ_SNOOP", - "T76x_L2_READ_HIT", - "T76x_L2_CLEAN_MISS", - "T76x_L2_WRITE_LOOKUP", - "T76x_L2_SWRITE_LOOKUP", - "T76x_L2_WRITE_REPLAY", - "T76x_L2_WRITE_SNOOP", - "T76x_L2_WRITE_HIT", - "T76x_L2_EXT_READ_FULL", - "", - "T76x_L2_EXT_WRITE_FULL", - "T76x_L2_EXT_R_W_HAZARD", - "T76x_L2_EXT_READ", - "T76x_L2_EXT_READ_LINE", - "T76x_L2_EXT_WRITE", - "T76x_L2_EXT_WRITE_LINE", - "T76x_L2_EXT_WRITE_SMALL", - "T76x_L2_EXT_BARRIER", - "T76x_L2_EXT_AR_STALL", - "T76x_L2_EXT_R_BUF_FULL", - "T76x_L2_EXT_RD_BUF_FULL", - "T76x_L2_EXT_R_RAW", - "T76x_L2_EXT_W_STALL", - "T76x_L2_EXT_W_BUF_FULL", - "T76x_L2_EXT_R_BUF_FULL", - "T76x_L2_TAG_HAZARD", - "T76x_L2_SNOOP_FULL", - "T76x_L2_REPLAY_FULL" -}; - -static const char * const hardware_counters_mali_t82x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T82x_MESSAGES_SENT", - "T82x_MESSAGES_RECEIVED", - "T82x_GPU_ACTIVE", - "T82x_IRQ_ACTIVE", - "T82x_JS0_JOBS", - "T82x_JS0_TASKS", - "T82x_JS0_ACTIVE", - "", - "T82x_JS0_WAIT_READ", - "T82x_JS0_WAIT_ISSUE", - "T82x_JS0_WAIT_DEPEND", - "T82x_JS0_WAIT_FINISH", - "T82x_JS1_JOBS", - "T82x_JS1_TASKS", - "T82x_JS1_ACTIVE", - "", - "T82x_JS1_WAIT_READ", - "T82x_JS1_WAIT_ISSUE", - "T82x_JS1_WAIT_DEPEND", - "T82x_JS1_WAIT_FINISH", - "T82x_JS2_JOBS", - "T82x_JS2_TASKS", - "T82x_JS2_ACTIVE", - "", - "T82x_JS2_WAIT_READ", - "T82x_JS2_WAIT_ISSUE", - "T82x_JS2_WAIT_DEPEND", - "T82x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T82x_TI_JOBS_PROCESSED", - "T82x_TI_TRIANGLES", - "T82x_TI_QUADS", - "T82x_TI_POLYGONS", - "T82x_TI_POINTS", - "T82x_TI_LINES", - "T82x_TI_FRONT_FACING", - "T82x_TI_BACK_FACING", - "T82x_TI_PRIM_VISIBLE", - "T82x_TI_PRIM_CULLED", - "T82x_TI_PRIM_CLIPPED", - "", - "", - "", - "", - "", - "", - "", - "", - "T82x_TI_ACTIVE", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "T82x_FRAG_ACTIVE", - "T82x_FRAG_PRIMITIVES", - "T82x_FRAG_PRIMITIVES_DROPPED", - "T82x_FRAG_CYCLES_DESC", - "T82x_FRAG_CYCLES_FPKQ_ACTIVE", - "T82x_FRAG_CYCLES_VERT", - "T82x_FRAG_CYCLES_TRISETUP", - "T82x_FRAG_CYCLES_EZS_ACTIVE", - "T82x_FRAG_THREADS", - "T82x_FRAG_DUMMY_THREADS", - "T82x_FRAG_QUADS_RAST", - "T82x_FRAG_QUADS_EZS_TEST", - "T82x_FRAG_QUADS_EZS_KILLED", - "T82x_FRAG_THREADS_LZS_TEST", - "T82x_FRAG_THREADS_LZS_KILLED", - "T82x_FRAG_CYCLES_NO_TILE", - "T82x_FRAG_NUM_TILES", - "T82x_FRAG_TRANS_ELIM", - "T82x_COMPUTE_ACTIVE", - "T82x_COMPUTE_TASKS", - "T82x_COMPUTE_THREADS", - "T82x_COMPUTE_CYCLES_DESC", - "T82x_TRIPIPE_ACTIVE", - "T82x_ARITH_WORDS", - "T82x_ARITH_CYCLES_REG", - "T82x_ARITH_CYCLES_L0", - "T82x_ARITH_FRAG_DEPEND", - "T82x_LS_WORDS", - "T82x_LS_ISSUES", - "T82x_LS_REISSUE_ATTR", - "T82x_LS_REISSUES_VARY", - "T82x_LS_VARY_RV_MISS", - "T82x_LS_VARY_RV_HIT", - "T82x_LS_NO_UNPARK", - "T82x_TEX_WORDS", - "T82x_TEX_BUBBLES", - "T82x_TEX_WORDS_L0", - "T82x_TEX_WORDS_DESC", - "T82x_TEX_ISSUES", - "T82x_TEX_RECIRC_FMISS", - "T82x_TEX_RECIRC_DESC", - "T82x_TEX_RECIRC_MULTI", - "T82x_TEX_RECIRC_PMISS", - "T82x_TEX_RECIRC_CONF", - "T82x_LSC_READ_HITS", - "T82x_LSC_READ_OP", - "T82x_LSC_WRITE_HITS", - "T82x_LSC_WRITE_OP", - "T82x_LSC_ATOMIC_HITS", - "T82x_LSC_ATOMIC_OP", - "T82x_LSC_LINE_FETCHES", - "T82x_LSC_DIRTY_LINE", - "T82x_LSC_SNOOPS", - "T82x_AXI_TLB_STALL", - "T82x_AXI_TLB_MISS", - "T82x_AXI_TLB_TRANSACTION", - "T82x_LS_TLB_MISS", - "T82x_LS_TLB_HIT", - "T82x_AXI_BEATS_READ", - "T82x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T82x_MMU_HIT", - "T82x_MMU_NEW_MISS", - "T82x_MMU_REPLAY_FULL", - "T82x_MMU_REPLAY_MISS", - "T82x_MMU_TABLE_WALK", - "T82x_MMU_REQUESTS", - "", - "", - "T82x_UTLB_HIT", - "T82x_UTLB_NEW_MISS", - "T82x_UTLB_REPLAY_FULL", - "T82x_UTLB_REPLAY_MISS", - "T82x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T82x_L2_EXT_WRITE_BEATS", - "T82x_L2_EXT_READ_BEATS", - "T82x_L2_ANY_LOOKUP", - "T82x_L2_READ_LOOKUP", - "T82x_L2_SREAD_LOOKUP", - "T82x_L2_READ_REPLAY", - "T82x_L2_READ_SNOOP", - "T82x_L2_READ_HIT", - "T82x_L2_CLEAN_MISS", - "T82x_L2_WRITE_LOOKUP", - "T82x_L2_SWRITE_LOOKUP", - "T82x_L2_WRITE_REPLAY", - "T82x_L2_WRITE_SNOOP", - "T82x_L2_WRITE_HIT", - "T82x_L2_EXT_READ_FULL", - "", - "T82x_L2_EXT_WRITE_FULL", - "T82x_L2_EXT_R_W_HAZARD", - "T82x_L2_EXT_READ", - "T82x_L2_EXT_READ_LINE", - "T82x_L2_EXT_WRITE", - "T82x_L2_EXT_WRITE_LINE", - "T82x_L2_EXT_WRITE_SMALL", - "T82x_L2_EXT_BARRIER", - "T82x_L2_EXT_AR_STALL", - "T82x_L2_EXT_R_BUF_FULL", - "T82x_L2_EXT_RD_BUF_FULL", - "T82x_L2_EXT_R_RAW", - "T82x_L2_EXT_W_STALL", - "T82x_L2_EXT_W_BUF_FULL", - "T82x_L2_EXT_R_BUF_FULL", - "T82x_L2_TAG_HAZARD", - "T82x_L2_SNOOP_FULL", - "T82x_L2_REPLAY_FULL" -}; - -static const char * const hardware_counters_mali_t83x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T83x_MESSAGES_SENT", - "T83x_MESSAGES_RECEIVED", - "T83x_GPU_ACTIVE", - "T83x_IRQ_ACTIVE", - "T83x_JS0_JOBS", - "T83x_JS0_TASKS", - "T83x_JS0_ACTIVE", - "", - "T83x_JS0_WAIT_READ", - "T83x_JS0_WAIT_ISSUE", - "T83x_JS0_WAIT_DEPEND", - "T83x_JS0_WAIT_FINISH", - "T83x_JS1_JOBS", - "T83x_JS1_TASKS", - "T83x_JS1_ACTIVE", - "", - "T83x_JS1_WAIT_READ", - "T83x_JS1_WAIT_ISSUE", - "T83x_JS1_WAIT_DEPEND", - "T83x_JS1_WAIT_FINISH", - "T83x_JS2_JOBS", - "T83x_JS2_TASKS", - "T83x_JS2_ACTIVE", - "", - "T83x_JS2_WAIT_READ", - "T83x_JS2_WAIT_ISSUE", - "T83x_JS2_WAIT_DEPEND", - "T83x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T83x_TI_JOBS_PROCESSED", - "T83x_TI_TRIANGLES", - "T83x_TI_QUADS", - "T83x_TI_POLYGONS", - "T83x_TI_POINTS", - "T83x_TI_LINES", - "T83x_TI_FRONT_FACING", - "T83x_TI_BACK_FACING", - "T83x_TI_PRIM_VISIBLE", - "T83x_TI_PRIM_CULLED", - "T83x_TI_PRIM_CLIPPED", - "", - "", - "", - "", - "", - "", - "", - "", - "T83x_TI_ACTIVE", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Shader Core */ - "", - "", - "", - "", - "T83x_FRAG_ACTIVE", - "T83x_FRAG_PRIMITIVES", - "T83x_FRAG_PRIMITIVES_DROPPED", - "T83x_FRAG_CYCLES_DESC", - "T83x_FRAG_CYCLES_FPKQ_ACTIVE", - "T83x_FRAG_CYCLES_VERT", - "T83x_FRAG_CYCLES_TRISETUP", - "T83x_FRAG_CYCLES_EZS_ACTIVE", - "T83x_FRAG_THREADS", - "T83x_FRAG_DUMMY_THREADS", - "T83x_FRAG_QUADS_RAST", - "T83x_FRAG_QUADS_EZS_TEST", - "T83x_FRAG_QUADS_EZS_KILLED", - "T83x_FRAG_THREADS_LZS_TEST", - "T83x_FRAG_THREADS_LZS_KILLED", - "T83x_FRAG_CYCLES_NO_TILE", - "T83x_FRAG_NUM_TILES", - "T83x_FRAG_TRANS_ELIM", - "T83x_COMPUTE_ACTIVE", - "T83x_COMPUTE_TASKS", - "T83x_COMPUTE_THREADS", - "T83x_COMPUTE_CYCLES_DESC", - "T83x_TRIPIPE_ACTIVE", - "T83x_ARITH_WORDS", - "T83x_ARITH_CYCLES_REG", - "T83x_ARITH_CYCLES_L0", - "T83x_ARITH_FRAG_DEPEND", - "T83x_LS_WORDS", - "T83x_LS_ISSUES", - "T83x_LS_REISSUE_ATTR", - "T83x_LS_REISSUES_VARY", - "T83x_LS_VARY_RV_MISS", - "T83x_LS_VARY_RV_HIT", - "T83x_LS_NO_UNPARK", - "T83x_TEX_WORDS", - "T83x_TEX_BUBBLES", - "T83x_TEX_WORDS_L0", - "T83x_TEX_WORDS_DESC", - "T83x_TEX_ISSUES", - "T83x_TEX_RECIRC_FMISS", - "T83x_TEX_RECIRC_DESC", - "T83x_TEX_RECIRC_MULTI", - "T83x_TEX_RECIRC_PMISS", - "T83x_TEX_RECIRC_CONF", - "T83x_LSC_READ_HITS", - "T83x_LSC_READ_OP", - "T83x_LSC_WRITE_HITS", - "T83x_LSC_WRITE_OP", - "T83x_LSC_ATOMIC_HITS", - "T83x_LSC_ATOMIC_OP", - "T83x_LSC_LINE_FETCHES", - "T83x_LSC_DIRTY_LINE", - "T83x_LSC_SNOOPS", - "T83x_AXI_TLB_STALL", - "T83x_AXI_TLB_MISS", - "T83x_AXI_TLB_TRANSACTION", - "T83x_LS_TLB_MISS", - "T83x_LS_TLB_HIT", - "T83x_AXI_BEATS_READ", - "T83x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T83x_MMU_HIT", - "T83x_MMU_NEW_MISS", - "T83x_MMU_REPLAY_FULL", - "T83x_MMU_REPLAY_MISS", - "T83x_MMU_TABLE_WALK", - "T83x_MMU_REQUESTS", - "", - "", - "T83x_UTLB_HIT", - "T83x_UTLB_NEW_MISS", - "T83x_UTLB_REPLAY_FULL", - "T83x_UTLB_REPLAY_MISS", - "T83x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T83x_L2_EXT_WRITE_BEATS", - "T83x_L2_EXT_READ_BEATS", - "T83x_L2_ANY_LOOKUP", - "T83x_L2_READ_LOOKUP", - "T83x_L2_SREAD_LOOKUP", - "T83x_L2_READ_REPLAY", - "T83x_L2_READ_SNOOP", - "T83x_L2_READ_HIT", - "T83x_L2_CLEAN_MISS", - "T83x_L2_WRITE_LOOKUP", - "T83x_L2_SWRITE_LOOKUP", - "T83x_L2_WRITE_REPLAY", - "T83x_L2_WRITE_SNOOP", - "T83x_L2_WRITE_HIT", - "T83x_L2_EXT_READ_FULL", - "", - "T83x_L2_EXT_WRITE_FULL", - "T83x_L2_EXT_R_W_HAZARD", - "T83x_L2_EXT_READ", - "T83x_L2_EXT_READ_LINE", - "T83x_L2_EXT_WRITE", - "T83x_L2_EXT_WRITE_LINE", - "T83x_L2_EXT_WRITE_SMALL", - "T83x_L2_EXT_BARRIER", - "T83x_L2_EXT_AR_STALL", - "T83x_L2_EXT_R_BUF_FULL", - "T83x_L2_EXT_RD_BUF_FULL", - "T83x_L2_EXT_R_RAW", - "T83x_L2_EXT_W_STALL", - "T83x_L2_EXT_W_BUF_FULL", - "T83x_L2_EXT_R_BUF_FULL", - "T83x_L2_TAG_HAZARD", - "T83x_L2_SNOOP_FULL", - "T83x_L2_REPLAY_FULL" -}; - -static const char * const hardware_counters_mali_t86x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T86x_MESSAGES_SENT", - "T86x_MESSAGES_RECEIVED", - "T86x_GPU_ACTIVE", - "T86x_IRQ_ACTIVE", - "T86x_JS0_JOBS", - "T86x_JS0_TASKS", - "T86x_JS0_ACTIVE", - "", - "T86x_JS0_WAIT_READ", - "T86x_JS0_WAIT_ISSUE", - "T86x_JS0_WAIT_DEPEND", - "T86x_JS0_WAIT_FINISH", - "T86x_JS1_JOBS", - "T86x_JS1_TASKS", - "T86x_JS1_ACTIVE", - "", - "T86x_JS1_WAIT_READ", - "T86x_JS1_WAIT_ISSUE", - "T86x_JS1_WAIT_DEPEND", - "T86x_JS1_WAIT_FINISH", - "T86x_JS2_JOBS", - "T86x_JS2_TASKS", - "T86x_JS2_ACTIVE", - "", - "T86x_JS2_WAIT_READ", - "T86x_JS2_WAIT_ISSUE", - "T86x_JS2_WAIT_DEPEND", - "T86x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T86x_TI_JOBS_PROCESSED", - "T86x_TI_TRIANGLES", - "T86x_TI_QUADS", - "T86x_TI_POLYGONS", - "T86x_TI_POINTS", - "T86x_TI_LINES", - "T86x_TI_VCACHE_HIT", - "T86x_TI_VCACHE_MISS", - "T86x_TI_FRONT_FACING", - "T86x_TI_BACK_FACING", - "T86x_TI_PRIM_VISIBLE", - "T86x_TI_PRIM_CULLED", - "T86x_TI_PRIM_CLIPPED", - "T86x_TI_LEVEL0", - "T86x_TI_LEVEL1", - "T86x_TI_LEVEL2", - "T86x_TI_LEVEL3", - "T86x_TI_LEVEL4", - "T86x_TI_LEVEL5", - "T86x_TI_LEVEL6", - "T86x_TI_LEVEL7", - "T86x_TI_COMMAND_1", - "T86x_TI_COMMAND_2", - "T86x_TI_COMMAND_3", - "T86x_TI_COMMAND_4", - "T86x_TI_COMMAND_5_7", - "T86x_TI_COMMAND_8_15", - "T86x_TI_COMMAND_16_63", - "T86x_TI_COMMAND_64", - "T86x_TI_COMPRESS_IN", - "T86x_TI_COMPRESS_OUT", - "T86x_TI_COMPRESS_FLUSH", - "T86x_TI_TIMESTAMPS", - "T86x_TI_PCACHE_HIT", - "T86x_TI_PCACHE_MISS", - "T86x_TI_PCACHE_LINE", - "T86x_TI_PCACHE_STALL", - "T86x_TI_WRBUF_HIT", - "T86x_TI_WRBUF_MISS", - "T86x_TI_WRBUF_LINE", - "T86x_TI_WRBUF_PARTIAL", - "T86x_TI_WRBUF_STALL", - "T86x_TI_ACTIVE", - "T86x_TI_LOADING_DESC", - "T86x_TI_INDEX_WAIT", - "T86x_TI_INDEX_RANGE_WAIT", - "T86x_TI_VERTEX_WAIT", - "T86x_TI_PCACHE_WAIT", - "T86x_TI_WRBUF_WAIT", - "T86x_TI_BUS_READ", - "T86x_TI_BUS_WRITE", - "", - "", - "", - "", - "", - "T86x_TI_UTLB_HIT", - "T86x_TI_UTLB_NEW_MISS", - "T86x_TI_UTLB_REPLAY_FULL", - "T86x_TI_UTLB_REPLAY_MISS", - "T86x_TI_UTLB_STALL", - - /* Shader Core */ - "", - "", - "", - "", - "T86x_FRAG_ACTIVE", - "T86x_FRAG_PRIMITIVES", - "T86x_FRAG_PRIMITIVES_DROPPED", - "T86x_FRAG_CYCLES_DESC", - "T86x_FRAG_CYCLES_FPKQ_ACTIVE", - "T86x_FRAG_CYCLES_VERT", - "T86x_FRAG_CYCLES_TRISETUP", - "T86x_FRAG_CYCLES_EZS_ACTIVE", - "T86x_FRAG_THREADS", - "T86x_FRAG_DUMMY_THREADS", - "T86x_FRAG_QUADS_RAST", - "T86x_FRAG_QUADS_EZS_TEST", - "T86x_FRAG_QUADS_EZS_KILLED", - "T86x_FRAG_THREADS_LZS_TEST", - "T86x_FRAG_THREADS_LZS_KILLED", - "T86x_FRAG_CYCLES_NO_TILE", - "T86x_FRAG_NUM_TILES", - "T86x_FRAG_TRANS_ELIM", - "T86x_COMPUTE_ACTIVE", - "T86x_COMPUTE_TASKS", - "T86x_COMPUTE_THREADS", - "T86x_COMPUTE_CYCLES_DESC", - "T86x_TRIPIPE_ACTIVE", - "T86x_ARITH_WORDS", - "T86x_ARITH_CYCLES_REG", - "T86x_ARITH_CYCLES_L0", - "T86x_ARITH_FRAG_DEPEND", - "T86x_LS_WORDS", - "T86x_LS_ISSUES", - "T86x_LS_REISSUE_ATTR", - "T86x_LS_REISSUES_VARY", - "T86x_LS_VARY_RV_MISS", - "T86x_LS_VARY_RV_HIT", - "T86x_LS_NO_UNPARK", - "T86x_TEX_WORDS", - "T86x_TEX_BUBBLES", - "T86x_TEX_WORDS_L0", - "T86x_TEX_WORDS_DESC", - "T86x_TEX_ISSUES", - "T86x_TEX_RECIRC_FMISS", - "T86x_TEX_RECIRC_DESC", - "T86x_TEX_RECIRC_MULTI", - "T86x_TEX_RECIRC_PMISS", - "T86x_TEX_RECIRC_CONF", - "T86x_LSC_READ_HITS", - "T86x_LSC_READ_OP", - "T86x_LSC_WRITE_HITS", - "T86x_LSC_WRITE_OP", - "T86x_LSC_ATOMIC_HITS", - "T86x_LSC_ATOMIC_OP", - "T86x_LSC_LINE_FETCHES", - "T86x_LSC_DIRTY_LINE", - "T86x_LSC_SNOOPS", - "T86x_AXI_TLB_STALL", - "T86x_AXI_TLB_MISS", - "T86x_AXI_TLB_TRANSACTION", - "T86x_LS_TLB_MISS", - "T86x_LS_TLB_HIT", - "T86x_AXI_BEATS_READ", - "T86x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T86x_MMU_HIT", - "T86x_MMU_NEW_MISS", - "T86x_MMU_REPLAY_FULL", - "T86x_MMU_REPLAY_MISS", - "T86x_MMU_TABLE_WALK", - "T86x_MMU_REQUESTS", - "", - "", - "T86x_UTLB_HIT", - "T86x_UTLB_NEW_MISS", - "T86x_UTLB_REPLAY_FULL", - "T86x_UTLB_REPLAY_MISS", - "T86x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T86x_L2_EXT_WRITE_BEATS", - "T86x_L2_EXT_READ_BEATS", - "T86x_L2_ANY_LOOKUP", - "T86x_L2_READ_LOOKUP", - "T86x_L2_SREAD_LOOKUP", - "T86x_L2_READ_REPLAY", - "T86x_L2_READ_SNOOP", - "T86x_L2_READ_HIT", - "T86x_L2_CLEAN_MISS", - "T86x_L2_WRITE_LOOKUP", - "T86x_L2_SWRITE_LOOKUP", - "T86x_L2_WRITE_REPLAY", - "T86x_L2_WRITE_SNOOP", - "T86x_L2_WRITE_HIT", - "T86x_L2_EXT_READ_FULL", - "", - "T86x_L2_EXT_WRITE_FULL", - "T86x_L2_EXT_R_W_HAZARD", - "T86x_L2_EXT_READ", - "T86x_L2_EXT_READ_LINE", - "T86x_L2_EXT_WRITE", - "T86x_L2_EXT_WRITE_LINE", - "T86x_L2_EXT_WRITE_SMALL", - "T86x_L2_EXT_BARRIER", - "T86x_L2_EXT_AR_STALL", - "T86x_L2_EXT_R_BUF_FULL", - "T86x_L2_EXT_RD_BUF_FULL", - "T86x_L2_EXT_R_RAW", - "T86x_L2_EXT_W_STALL", - "T86x_L2_EXT_W_BUF_FULL", - "T86x_L2_EXT_R_BUF_FULL", - "T86x_L2_TAG_HAZARD", - "T86x_L2_SNOOP_FULL", - "T86x_L2_REPLAY_FULL" -}; - -static const char * const hardware_counters_mali_t88x[] = { - /* Job Manager */ - "", - "", - "", - "", - "T88x_MESSAGES_SENT", - "T88x_MESSAGES_RECEIVED", - "T88x_GPU_ACTIVE", - "T88x_IRQ_ACTIVE", - "T88x_JS0_JOBS", - "T88x_JS0_TASKS", - "T88x_JS0_ACTIVE", - "", - "T88x_JS0_WAIT_READ", - "T88x_JS0_WAIT_ISSUE", - "T88x_JS0_WAIT_DEPEND", - "T88x_JS0_WAIT_FINISH", - "T88x_JS1_JOBS", - "T88x_JS1_TASKS", - "T88x_JS1_ACTIVE", - "", - "T88x_JS1_WAIT_READ", - "T88x_JS1_WAIT_ISSUE", - "T88x_JS1_WAIT_DEPEND", - "T88x_JS1_WAIT_FINISH", - "T88x_JS2_JOBS", - "T88x_JS2_TASKS", - "T88x_JS2_ACTIVE", - "", - "T88x_JS2_WAIT_READ", - "T88x_JS2_WAIT_ISSUE", - "T88x_JS2_WAIT_DEPEND", - "T88x_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /*Tiler */ - "", - "", - "", - "T88x_TI_JOBS_PROCESSED", - "T88x_TI_TRIANGLES", - "T88x_TI_QUADS", - "T88x_TI_POLYGONS", - "T88x_TI_POINTS", - "T88x_TI_LINES", - "T88x_TI_VCACHE_HIT", - "T88x_TI_VCACHE_MISS", - "T88x_TI_FRONT_FACING", - "T88x_TI_BACK_FACING", - "T88x_TI_PRIM_VISIBLE", - "T88x_TI_PRIM_CULLED", - "T88x_TI_PRIM_CLIPPED", - "T88x_TI_LEVEL0", - "T88x_TI_LEVEL1", - "T88x_TI_LEVEL2", - "T88x_TI_LEVEL3", - "T88x_TI_LEVEL4", - "T88x_TI_LEVEL5", - "T88x_TI_LEVEL6", - "T88x_TI_LEVEL7", - "T88x_TI_COMMAND_1", - "T88x_TI_COMMAND_2", - "T88x_TI_COMMAND_3", - "T88x_TI_COMMAND_4", - "T88x_TI_COMMAND_5_7", - "T88x_TI_COMMAND_8_15", - "T88x_TI_COMMAND_16_63", - "T88x_TI_COMMAND_64", - "T88x_TI_COMPRESS_IN", - "T88x_TI_COMPRESS_OUT", - "T88x_TI_COMPRESS_FLUSH", - "T88x_TI_TIMESTAMPS", - "T88x_TI_PCACHE_HIT", - "T88x_TI_PCACHE_MISS", - "T88x_TI_PCACHE_LINE", - "T88x_TI_PCACHE_STALL", - "T88x_TI_WRBUF_HIT", - "T88x_TI_WRBUF_MISS", - "T88x_TI_WRBUF_LINE", - "T88x_TI_WRBUF_PARTIAL", - "T88x_TI_WRBUF_STALL", - "T88x_TI_ACTIVE", - "T88x_TI_LOADING_DESC", - "T88x_TI_INDEX_WAIT", - "T88x_TI_INDEX_RANGE_WAIT", - "T88x_TI_VERTEX_WAIT", - "T88x_TI_PCACHE_WAIT", - "T88x_TI_WRBUF_WAIT", - "T88x_TI_BUS_READ", - "T88x_TI_BUS_WRITE", - "", - "", - "", - "", - "", - "T88x_TI_UTLB_HIT", - "T88x_TI_UTLB_NEW_MISS", - "T88x_TI_UTLB_REPLAY_FULL", - "T88x_TI_UTLB_REPLAY_MISS", - "T88x_TI_UTLB_STALL", - - /* Shader Core */ - "", - "", - "", - "", - "T88x_FRAG_ACTIVE", - "T88x_FRAG_PRIMITIVES", - "T88x_FRAG_PRIMITIVES_DROPPED", - "T88x_FRAG_CYCLES_DESC", - "T88x_FRAG_CYCLES_FPKQ_ACTIVE", - "T88x_FRAG_CYCLES_VERT", - "T88x_FRAG_CYCLES_TRISETUP", - "T88x_FRAG_CYCLES_EZS_ACTIVE", - "T88x_FRAG_THREADS", - "T88x_FRAG_DUMMY_THREADS", - "T88x_FRAG_QUADS_RAST", - "T88x_FRAG_QUADS_EZS_TEST", - "T88x_FRAG_QUADS_EZS_KILLED", - "T88x_FRAG_THREADS_LZS_TEST", - "T88x_FRAG_THREADS_LZS_KILLED", - "T88x_FRAG_CYCLES_NO_TILE", - "T88x_FRAG_NUM_TILES", - "T88x_FRAG_TRANS_ELIM", - "T88x_COMPUTE_ACTIVE", - "T88x_COMPUTE_TASKS", - "T88x_COMPUTE_THREADS", - "T88x_COMPUTE_CYCLES_DESC", - "T88x_TRIPIPE_ACTIVE", - "T88x_ARITH_WORDS", - "T88x_ARITH_CYCLES_REG", - "T88x_ARITH_CYCLES_L0", - "T88x_ARITH_FRAG_DEPEND", - "T88x_LS_WORDS", - "T88x_LS_ISSUES", - "T88x_LS_REISSUE_ATTR", - "T88x_LS_REISSUES_VARY", - "T88x_LS_VARY_RV_MISS", - "T88x_LS_VARY_RV_HIT", - "T88x_LS_NO_UNPARK", - "T88x_TEX_WORDS", - "T88x_TEX_BUBBLES", - "T88x_TEX_WORDS_L0", - "T88x_TEX_WORDS_DESC", - "T88x_TEX_ISSUES", - "T88x_TEX_RECIRC_FMISS", - "T88x_TEX_RECIRC_DESC", - "T88x_TEX_RECIRC_MULTI", - "T88x_TEX_RECIRC_PMISS", - "T88x_TEX_RECIRC_CONF", - "T88x_LSC_READ_HITS", - "T88x_LSC_READ_OP", - "T88x_LSC_WRITE_HITS", - "T88x_LSC_WRITE_OP", - "T88x_LSC_ATOMIC_HITS", - "T88x_LSC_ATOMIC_OP", - "T88x_LSC_LINE_FETCHES", - "T88x_LSC_DIRTY_LINE", - "T88x_LSC_SNOOPS", - "T88x_AXI_TLB_STALL", - "T88x_AXI_TLB_MISS", - "T88x_AXI_TLB_TRANSACTION", - "T88x_LS_TLB_MISS", - "T88x_LS_TLB_HIT", - "T88x_AXI_BEATS_READ", - "T88x_AXI_BEATS_WRITTEN", - - /*L2 and MMU */ - "", - "", - "", - "", - "T88x_MMU_HIT", - "T88x_MMU_NEW_MISS", - "T88x_MMU_REPLAY_FULL", - "T88x_MMU_REPLAY_MISS", - "T88x_MMU_TABLE_WALK", - "T88x_MMU_REQUESTS", - "", - "", - "T88x_UTLB_HIT", - "T88x_UTLB_NEW_MISS", - "T88x_UTLB_REPLAY_FULL", - "T88x_UTLB_REPLAY_MISS", - "T88x_UTLB_STALL", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "T88x_L2_EXT_WRITE_BEATS", - "T88x_L2_EXT_READ_BEATS", - "T88x_L2_ANY_LOOKUP", - "T88x_L2_READ_LOOKUP", - "T88x_L2_SREAD_LOOKUP", - "T88x_L2_READ_REPLAY", - "T88x_L2_READ_SNOOP", - "T88x_L2_READ_HIT", - "T88x_L2_CLEAN_MISS", - "T88x_L2_WRITE_LOOKUP", - "T88x_L2_SWRITE_LOOKUP", - "T88x_L2_WRITE_REPLAY", - "T88x_L2_WRITE_SNOOP", - "T88x_L2_WRITE_HIT", - "T88x_L2_EXT_READ_FULL", - "", - "T88x_L2_EXT_WRITE_FULL", - "T88x_L2_EXT_R_W_HAZARD", - "T88x_L2_EXT_READ", - "T88x_L2_EXT_READ_LINE", - "T88x_L2_EXT_WRITE", - "T88x_L2_EXT_WRITE_LINE", - "T88x_L2_EXT_WRITE_SMALL", - "T88x_L2_EXT_BARRIER", - "T88x_L2_EXT_AR_STALL", - "T88x_L2_EXT_R_BUF_FULL", - "T88x_L2_EXT_RD_BUF_FULL", - "T88x_L2_EXT_R_RAW", - "T88x_L2_EXT_W_STALL", - "T88x_L2_EXT_W_BUF_FULL", - "T88x_L2_EXT_R_BUF_FULL", - "T88x_L2_TAG_HAZARD", - "T88x_L2_SNOOP_FULL", - "T88x_L2_REPLAY_FULL" -}; - -#include "mali_kbase_gator_hwcnt_names_tmix.h" - -#include "mali_kbase_gator_hwcnt_names_thex.h" - -#include "mali_kbase_gator_hwcnt_names_tsix.h" - -#include "mali_kbase_gator_hwcnt_names_tnox.h" - -#include "mali_kbase_gator_hwcnt_names_tgox.h" - -#include "mali_kbase_gator_hwcnt_names_tkax.h" - -#include "mali_kbase_gator_hwcnt_names_ttrx.h" - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tgox.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tgox.h deleted file mode 100755 index 72b5266622a9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tgox.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ - -static const char * const hardware_counters_mali_tGOx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TGOx_MESSAGES_SENT", - "TGOx_MESSAGES_RECEIVED", - "TGOx_GPU_ACTIVE", - "TGOx_IRQ_ACTIVE", - "TGOx_JS0_JOBS", - "TGOx_JS0_TASKS", - "TGOx_JS0_ACTIVE", - "", - "TGOx_JS0_WAIT_READ", - "TGOx_JS0_WAIT_ISSUE", - "TGOx_JS0_WAIT_DEPEND", - "TGOx_JS0_WAIT_FINISH", - "TGOx_JS1_JOBS", - "TGOx_JS1_TASKS", - "TGOx_JS1_ACTIVE", - "", - "TGOx_JS1_WAIT_READ", - "TGOx_JS1_WAIT_ISSUE", - "TGOx_JS1_WAIT_DEPEND", - "TGOx_JS1_WAIT_FINISH", - "TGOx_JS2_JOBS", - "TGOx_JS2_TASKS", - "TGOx_JS2_ACTIVE", - "", - "TGOx_JS2_WAIT_READ", - "TGOx_JS2_WAIT_ISSUE", - "TGOx_JS2_WAIT_DEPEND", - "TGOx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TGOx_TILER_ACTIVE", - "TGOx_JOBS_PROCESSED", - "TGOx_TRIANGLES", - "TGOx_LINES", - "TGOx_POINTS", - "TGOx_FRONT_FACING", - "TGOx_BACK_FACING", - "TGOx_PRIM_VISIBLE", - "TGOx_PRIM_CULLED", - "TGOx_PRIM_CLIPPED", - "TGOx_PRIM_SAT_CULLED", - "TGOx_BIN_ALLOC_INIT", - "TGOx_BIN_ALLOC_OVERFLOW", - "TGOx_BUS_READ", - "", - "TGOx_BUS_WRITE", - "TGOx_LOADING_DESC", - "TGOx_IDVS_POS_SHAD_REQ", - "TGOx_IDVS_POS_SHAD_WAIT", - "TGOx_IDVS_POS_SHAD_STALL", - "TGOx_IDVS_POS_FIFO_FULL", - "TGOx_PREFETCH_STALL", - "TGOx_VCACHE_HIT", - "TGOx_VCACHE_MISS", - "TGOx_VCACHE_LINE_WAIT", - "TGOx_VFETCH_POS_READ_WAIT", - "TGOx_VFETCH_VERTEX_WAIT", - "TGOx_VFETCH_STALL", - "TGOx_PRIMASSY_STALL", - "TGOx_BBOX_GEN_STALL", - "TGOx_IDVS_VBU_HIT", - "TGOx_IDVS_VBU_MISS", - "TGOx_IDVS_VBU_LINE_DEALLOCATE", - "TGOx_IDVS_VAR_SHAD_REQ", - "TGOx_IDVS_VAR_SHAD_STALL", - "TGOx_BINNER_STALL", - "TGOx_ITER_STALL", - "TGOx_COMPRESS_MISS", - "TGOx_COMPRESS_STALL", - "TGOx_PCACHE_HIT", - "TGOx_PCACHE_MISS", - "TGOx_PCACHE_MISS_STALL", - "TGOx_PCACHE_EVICT_STALL", - "TGOx_PMGR_PTR_WR_STALL", - "TGOx_PMGR_PTR_RD_STALL", - "TGOx_PMGR_CMD_WR_STALL", - "TGOx_WRBUF_ACTIVE", - "TGOx_WRBUF_HIT", - "TGOx_WRBUF_MISS", - "TGOx_WRBUF_NO_FREE_LINE_STALL", - "TGOx_WRBUF_NO_AXI_ID_STALL", - "TGOx_WRBUF_AXI_STALL", - "", - "", - "", - "TGOx_UTLB_TRANS", - "TGOx_UTLB_TRANS_HIT", - "TGOx_UTLB_TRANS_STALL", - "TGOx_UTLB_TRANS_MISS_DELAY", - "TGOx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TGOx_FRAG_ACTIVE", - "TGOx_FRAG_PRIMITIVES", - "TGOx_FRAG_PRIM_RAST", - "TGOx_FRAG_FPK_ACTIVE", - "TGOx_FRAG_STARVING", - "TGOx_FRAG_WARPS", - "TGOx_FRAG_PARTIAL_WARPS", - "TGOx_FRAG_QUADS_RAST", - "TGOx_FRAG_QUADS_EZS_TEST", - "TGOx_FRAG_QUADS_EZS_UPDATE", - "TGOx_FRAG_QUADS_EZS_KILL", - "TGOx_FRAG_LZS_TEST", - "TGOx_FRAG_LZS_KILL", - "TGOx_WARP_REG_SIZE_64", - "TGOx_FRAG_PTILES", - "TGOx_FRAG_TRANS_ELIM", - "TGOx_QUAD_FPK_KILLER", - "TGOx_FULL_QUAD_WARPS", - "TGOx_COMPUTE_ACTIVE", - "TGOx_COMPUTE_TASKS", - "TGOx_COMPUTE_WARPS", - "TGOx_COMPUTE_STARVING", - "TGOx_EXEC_CORE_ACTIVE", - "TGOx_EXEC_ACTIVE", - "TGOx_EXEC_INSTR_COUNT", - "TGOx_EXEC_INSTR_DIVERGED", - "TGOx_EXEC_INSTR_STARVING", - "TGOx_ARITH_INSTR_SINGLE_FMA", - "TGOx_ARITH_INSTR_DOUBLE", - "TGOx_ARITH_INSTR_MSG", - "TGOx_ARITH_INSTR_MSG_ONLY", - "TGOx_TEX_MSGI_NUM_QUADS", - "TGOx_TEX_DFCH_NUM_PASSES", - "TGOx_TEX_DFCH_NUM_PASSES_MISS", - "TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TGOx_TEX_TFCH_NUM_LINES_FETCHED", - "TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TGOx_TEX_TFCH_NUM_OPERATIONS", - "TGOx_TEX_FILT_NUM_OPERATIONS", - "TGOx_LS_MEM_READ_FULL", - "TGOx_LS_MEM_READ_SHORT", - "TGOx_LS_MEM_WRITE_FULL", - "TGOx_LS_MEM_WRITE_SHORT", - "TGOx_LS_MEM_ATOMIC", - "TGOx_VARY_INSTR", - "TGOx_VARY_SLOT_32", - "TGOx_VARY_SLOT_16", - "TGOx_ATTR_INSTR", - "TGOx_ARITH_INSTR_FP_MUL", - "TGOx_BEATS_RD_FTC", - "TGOx_BEATS_RD_FTC_EXT", - "TGOx_BEATS_RD_LSC", - "TGOx_BEATS_RD_LSC_EXT", - "TGOx_BEATS_RD_TEX", - "TGOx_BEATS_RD_TEX_EXT", - "TGOx_BEATS_RD_OTHER", - "TGOx_BEATS_WR_LSC_WB", - "TGOx_BEATS_WR_TIB", - "TGOx_BEATS_WR_LSC_OTHER", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TGOx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TGOx_L2_RD_MSG_IN", - "TGOx_L2_RD_MSG_IN_STALL", - "TGOx_L2_WR_MSG_IN", - "TGOx_L2_WR_MSG_IN_STALL", - "TGOx_L2_SNP_MSG_IN", - "TGOx_L2_SNP_MSG_IN_STALL", - "TGOx_L2_RD_MSG_OUT", - "TGOx_L2_RD_MSG_OUT_STALL", - "TGOx_L2_WR_MSG_OUT", - "TGOx_L2_ANY_LOOKUP", - "TGOx_L2_READ_LOOKUP", - "TGOx_L2_WRITE_LOOKUP", - "TGOx_L2_EXT_SNOOP_LOOKUP", - "TGOx_L2_EXT_READ", - "TGOx_L2_EXT_READ_NOSNP", - "TGOx_L2_EXT_READ_UNIQUE", - "TGOx_L2_EXT_READ_BEATS", - "TGOx_L2_EXT_AR_STALL", - "TGOx_L2_EXT_AR_CNT_Q1", - "TGOx_L2_EXT_AR_CNT_Q2", - "TGOx_L2_EXT_AR_CNT_Q3", - "TGOx_L2_EXT_RRESP_0_127", - "TGOx_L2_EXT_RRESP_128_191", - "TGOx_L2_EXT_RRESP_192_255", - "TGOx_L2_EXT_RRESP_256_319", - "TGOx_L2_EXT_RRESP_320_383", - "TGOx_L2_EXT_WRITE", - "TGOx_L2_EXT_WRITE_NOSNP_FULL", - "TGOx_L2_EXT_WRITE_NOSNP_PTL", - "TGOx_L2_EXT_WRITE_SNP_FULL", - "TGOx_L2_EXT_WRITE_SNP_PTL", - "TGOx_L2_EXT_WRITE_BEATS", - "TGOx_L2_EXT_W_STALL", - "TGOx_L2_EXT_AW_CNT_Q1", - "TGOx_L2_EXT_AW_CNT_Q2", - "TGOx_L2_EXT_AW_CNT_Q3", - "TGOx_L2_EXT_SNOOP", - "TGOx_L2_EXT_SNOOP_STALL", - "TGOx_L2_EXT_SNOOP_RESP_CLEAN", - "TGOx_L2_EXT_SNOOP_RESP_DATA", - "TGOx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TGOX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_thex.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_thex.h deleted file mode 100755 index e24e91ab1ca4..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_thex.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_THEX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_THEX_H_ - -static const char * const hardware_counters_mali_tHEx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "THEx_MESSAGES_SENT", - "THEx_MESSAGES_RECEIVED", - "THEx_GPU_ACTIVE", - "THEx_IRQ_ACTIVE", - "THEx_JS0_JOBS", - "THEx_JS0_TASKS", - "THEx_JS0_ACTIVE", - "", - "THEx_JS0_WAIT_READ", - "THEx_JS0_WAIT_ISSUE", - "THEx_JS0_WAIT_DEPEND", - "THEx_JS0_WAIT_FINISH", - "THEx_JS1_JOBS", - "THEx_JS1_TASKS", - "THEx_JS1_ACTIVE", - "", - "THEx_JS1_WAIT_READ", - "THEx_JS1_WAIT_ISSUE", - "THEx_JS1_WAIT_DEPEND", - "THEx_JS1_WAIT_FINISH", - "THEx_JS2_JOBS", - "THEx_JS2_TASKS", - "THEx_JS2_ACTIVE", - "", - "THEx_JS2_WAIT_READ", - "THEx_JS2_WAIT_ISSUE", - "THEx_JS2_WAIT_DEPEND", - "THEx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "THEx_TILER_ACTIVE", - "THEx_JOBS_PROCESSED", - "THEx_TRIANGLES", - "THEx_LINES", - "THEx_POINTS", - "THEx_FRONT_FACING", - "THEx_BACK_FACING", - "THEx_PRIM_VISIBLE", - "THEx_PRIM_CULLED", - "THEx_PRIM_CLIPPED", - "THEx_PRIM_SAT_CULLED", - "THEx_BIN_ALLOC_INIT", - "THEx_BIN_ALLOC_OVERFLOW", - "THEx_BUS_READ", - "", - "THEx_BUS_WRITE", - "THEx_LOADING_DESC", - "THEx_IDVS_POS_SHAD_REQ", - "THEx_IDVS_POS_SHAD_WAIT", - "THEx_IDVS_POS_SHAD_STALL", - "THEx_IDVS_POS_FIFO_FULL", - "THEx_PREFETCH_STALL", - "THEx_VCACHE_HIT", - "THEx_VCACHE_MISS", - "THEx_VCACHE_LINE_WAIT", - "THEx_VFETCH_POS_READ_WAIT", - "THEx_VFETCH_VERTEX_WAIT", - "THEx_VFETCH_STALL", - "THEx_PRIMASSY_STALL", - "THEx_BBOX_GEN_STALL", - "THEx_IDVS_VBU_HIT", - "THEx_IDVS_VBU_MISS", - "THEx_IDVS_VBU_LINE_DEALLOCATE", - "THEx_IDVS_VAR_SHAD_REQ", - "THEx_IDVS_VAR_SHAD_STALL", - "THEx_BINNER_STALL", - "THEx_ITER_STALL", - "THEx_COMPRESS_MISS", - "THEx_COMPRESS_STALL", - "THEx_PCACHE_HIT", - "THEx_PCACHE_MISS", - "THEx_PCACHE_MISS_STALL", - "THEx_PCACHE_EVICT_STALL", - "THEx_PMGR_PTR_WR_STALL", - "THEx_PMGR_PTR_RD_STALL", - "THEx_PMGR_CMD_WR_STALL", - "THEx_WRBUF_ACTIVE", - "THEx_WRBUF_HIT", - "THEx_WRBUF_MISS", - "THEx_WRBUF_NO_FREE_LINE_STALL", - "THEx_WRBUF_NO_AXI_ID_STALL", - "THEx_WRBUF_AXI_STALL", - "", - "", - "", - "THEx_UTLB_TRANS", - "THEx_UTLB_TRANS_HIT", - "THEx_UTLB_TRANS_STALL", - "THEx_UTLB_TRANS_MISS_DELAY", - "THEx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "THEx_FRAG_ACTIVE", - "THEx_FRAG_PRIMITIVES", - "THEx_FRAG_PRIM_RAST", - "THEx_FRAG_FPK_ACTIVE", - "THEx_FRAG_STARVING", - "THEx_FRAG_WARPS", - "THEx_FRAG_PARTIAL_WARPS", - "THEx_FRAG_QUADS_RAST", - "THEx_FRAG_QUADS_EZS_TEST", - "THEx_FRAG_QUADS_EZS_UPDATE", - "THEx_FRAG_QUADS_EZS_KILL", - "THEx_FRAG_LZS_TEST", - "THEx_FRAG_LZS_KILL", - "", - "THEx_FRAG_PTILES", - "THEx_FRAG_TRANS_ELIM", - "THEx_QUAD_FPK_KILLER", - "", - "THEx_COMPUTE_ACTIVE", - "THEx_COMPUTE_TASKS", - "THEx_COMPUTE_WARPS", - "THEx_COMPUTE_STARVING", - "THEx_EXEC_CORE_ACTIVE", - "THEx_EXEC_ACTIVE", - "THEx_EXEC_INSTR_COUNT", - "THEx_EXEC_INSTR_DIVERGED", - "THEx_EXEC_INSTR_STARVING", - "THEx_ARITH_INSTR_SINGLE_FMA", - "THEx_ARITH_INSTR_DOUBLE", - "THEx_ARITH_INSTR_MSG", - "THEx_ARITH_INSTR_MSG_ONLY", - "THEx_TEX_INSTR", - "THEx_TEX_INSTR_MIPMAP", - "THEx_TEX_INSTR_COMPRESSED", - "THEx_TEX_INSTR_3D", - "THEx_TEX_INSTR_TRILINEAR", - "THEx_TEX_COORD_ISSUE", - "THEx_TEX_COORD_STALL", - "THEx_TEX_STARVE_CACHE", - "THEx_TEX_STARVE_FILTER", - "THEx_LS_MEM_READ_FULL", - "THEx_LS_MEM_READ_SHORT", - "THEx_LS_MEM_WRITE_FULL", - "THEx_LS_MEM_WRITE_SHORT", - "THEx_LS_MEM_ATOMIC", - "THEx_VARY_INSTR", - "THEx_VARY_SLOT_32", - "THEx_VARY_SLOT_16", - "THEx_ATTR_INSTR", - "THEx_ARITH_INSTR_FP_MUL", - "THEx_BEATS_RD_FTC", - "THEx_BEATS_RD_FTC_EXT", - "THEx_BEATS_RD_LSC", - "THEx_BEATS_RD_LSC_EXT", - "THEx_BEATS_RD_TEX", - "THEx_BEATS_RD_TEX_EXT", - "THEx_BEATS_RD_OTHER", - "THEx_BEATS_WR_LSC", - "THEx_BEATS_WR_TIB", - "", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "THEx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "THEx_L2_RD_MSG_IN", - "THEx_L2_RD_MSG_IN_STALL", - "THEx_L2_WR_MSG_IN", - "THEx_L2_WR_MSG_IN_STALL", - "THEx_L2_SNP_MSG_IN", - "THEx_L2_SNP_MSG_IN_STALL", - "THEx_L2_RD_MSG_OUT", - "THEx_L2_RD_MSG_OUT_STALL", - "THEx_L2_WR_MSG_OUT", - "THEx_L2_ANY_LOOKUP", - "THEx_L2_READ_LOOKUP", - "THEx_L2_WRITE_LOOKUP", - "THEx_L2_EXT_SNOOP_LOOKUP", - "THEx_L2_EXT_READ", - "THEx_L2_EXT_READ_NOSNP", - "THEx_L2_EXT_READ_UNIQUE", - "THEx_L2_EXT_READ_BEATS", - "THEx_L2_EXT_AR_STALL", - "THEx_L2_EXT_AR_CNT_Q1", - "THEx_L2_EXT_AR_CNT_Q2", - "THEx_L2_EXT_AR_CNT_Q3", - "THEx_L2_EXT_RRESP_0_127", - "THEx_L2_EXT_RRESP_128_191", - "THEx_L2_EXT_RRESP_192_255", - "THEx_L2_EXT_RRESP_256_319", - "THEx_L2_EXT_RRESP_320_383", - "THEx_L2_EXT_WRITE", - "THEx_L2_EXT_WRITE_NOSNP_FULL", - "THEx_L2_EXT_WRITE_NOSNP_PTL", - "THEx_L2_EXT_WRITE_SNP_FULL", - "THEx_L2_EXT_WRITE_SNP_PTL", - "THEx_L2_EXT_WRITE_BEATS", - "THEx_L2_EXT_W_STALL", - "THEx_L2_EXT_AW_CNT_Q1", - "THEx_L2_EXT_AW_CNT_Q2", - "THEx_L2_EXT_AW_CNT_Q3", - "THEx_L2_EXT_SNOOP", - "THEx_L2_EXT_SNOOP_STALL", - "THEx_L2_EXT_SNOOP_RESP_CLEAN", - "THEx_L2_EXT_SNOOP_RESP_DATA", - "THEx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_THEX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tkax.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tkax.h deleted file mode 100755 index 73db45c232f1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tkax.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ - -static const char * const hardware_counters_mali_tKAx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TKAx_MESSAGES_SENT", - "TKAx_MESSAGES_RECEIVED", - "TKAx_GPU_ACTIVE", - "TKAx_IRQ_ACTIVE", - "TKAx_JS0_JOBS", - "TKAx_JS0_TASKS", - "TKAx_JS0_ACTIVE", - "", - "TKAx_JS0_WAIT_READ", - "TKAx_JS0_WAIT_ISSUE", - "TKAx_JS0_WAIT_DEPEND", - "TKAx_JS0_WAIT_FINISH", - "TKAx_JS1_JOBS", - "TKAx_JS1_TASKS", - "TKAx_JS1_ACTIVE", - "", - "TKAx_JS1_WAIT_READ", - "TKAx_JS1_WAIT_ISSUE", - "TKAx_JS1_WAIT_DEPEND", - "TKAx_JS1_WAIT_FINISH", - "TKAx_JS2_JOBS", - "TKAx_JS2_TASKS", - "TKAx_JS2_ACTIVE", - "", - "TKAx_JS2_WAIT_READ", - "TKAx_JS2_WAIT_ISSUE", - "TKAx_JS2_WAIT_DEPEND", - "TKAx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TKAx_TILER_ACTIVE", - "TKAx_JOBS_PROCESSED", - "TKAx_TRIANGLES", - "TKAx_LINES", - "TKAx_POINTS", - "TKAx_FRONT_FACING", - "TKAx_BACK_FACING", - "TKAx_PRIM_VISIBLE", - "TKAx_PRIM_CULLED", - "TKAx_PRIM_CLIPPED", - "TKAx_PRIM_SAT_CULLED", - "TKAx_BIN_ALLOC_INIT", - "TKAx_BIN_ALLOC_OVERFLOW", - "TKAx_BUS_READ", - "", - "TKAx_BUS_WRITE", - "TKAx_LOADING_DESC", - "TKAx_IDVS_POS_SHAD_REQ", - "TKAx_IDVS_POS_SHAD_WAIT", - "TKAx_IDVS_POS_SHAD_STALL", - "TKAx_IDVS_POS_FIFO_FULL", - "TKAx_PREFETCH_STALL", - "TKAx_VCACHE_HIT", - "TKAx_VCACHE_MISS", - "TKAx_VCACHE_LINE_WAIT", - "TKAx_VFETCH_POS_READ_WAIT", - "TKAx_VFETCH_VERTEX_WAIT", - "TKAx_VFETCH_STALL", - "TKAx_PRIMASSY_STALL", - "TKAx_BBOX_GEN_STALL", - "TKAx_IDVS_VBU_HIT", - "TKAx_IDVS_VBU_MISS", - "TKAx_IDVS_VBU_LINE_DEALLOCATE", - "TKAx_IDVS_VAR_SHAD_REQ", - "TKAx_IDVS_VAR_SHAD_STALL", - "TKAx_BINNER_STALL", - "TKAx_ITER_STALL", - "TKAx_COMPRESS_MISS", - "TKAx_COMPRESS_STALL", - "TKAx_PCACHE_HIT", - "TKAx_PCACHE_MISS", - "TKAx_PCACHE_MISS_STALL", - "TKAx_PCACHE_EVICT_STALL", - "TKAx_PMGR_PTR_WR_STALL", - "TKAx_PMGR_PTR_RD_STALL", - "TKAx_PMGR_CMD_WR_STALL", - "TKAx_WRBUF_ACTIVE", - "TKAx_WRBUF_HIT", - "TKAx_WRBUF_MISS", - "TKAx_WRBUF_NO_FREE_LINE_STALL", - "TKAx_WRBUF_NO_AXI_ID_STALL", - "TKAx_WRBUF_AXI_STALL", - "", - "", - "", - "TKAx_UTLB_TRANS", - "TKAx_UTLB_TRANS_HIT", - "TKAx_UTLB_TRANS_STALL", - "TKAx_UTLB_TRANS_MISS_DELAY", - "TKAx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TKAx_FRAG_ACTIVE", - "TKAx_FRAG_PRIMITIVES", - "TKAx_FRAG_PRIM_RAST", - "TKAx_FRAG_FPK_ACTIVE", - "TKAx_FRAG_STARVING", - "TKAx_FRAG_WARPS", - "TKAx_FRAG_PARTIAL_WARPS", - "TKAx_FRAG_QUADS_RAST", - "TKAx_FRAG_QUADS_EZS_TEST", - "TKAx_FRAG_QUADS_EZS_UPDATE", - "TKAx_FRAG_QUADS_EZS_KILL", - "TKAx_FRAG_LZS_TEST", - "TKAx_FRAG_LZS_KILL", - "TKAx_WARP_REG_SIZE_64", - "TKAx_FRAG_PTILES", - "TKAx_FRAG_TRANS_ELIM", - "TKAx_QUAD_FPK_KILLER", - "TKAx_FULL_QUAD_WARPS", - "TKAx_COMPUTE_ACTIVE", - "TKAx_COMPUTE_TASKS", - "TKAx_COMPUTE_WARPS", - "TKAx_COMPUTE_STARVING", - "TKAx_EXEC_CORE_ACTIVE", - "TKAx_EXEC_ACTIVE", - "TKAx_EXEC_INSTR_COUNT", - "TKAx_EXEC_INSTR_DIVERGED", - "TKAx_EXEC_INSTR_STARVING", - "TKAx_ARITH_INSTR_SINGLE_FMA", - "TKAx_ARITH_INSTR_DOUBLE", - "TKAx_ARITH_INSTR_MSG", - "TKAx_ARITH_INSTR_MSG_ONLY", - "TKAx_TEX_MSGI_NUM_QUADS", - "TKAx_TEX_DFCH_NUM_PASSES", - "TKAx_TEX_DFCH_NUM_PASSES_MISS", - "TKAx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TKAx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TKAx_TEX_TFCH_NUM_LINES_FETCHED", - "TKAx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TKAx_TEX_TFCH_NUM_OPERATIONS", - "TKAx_TEX_FILT_NUM_OPERATIONS", - "TKAx_LS_MEM_READ_FULL", - "TKAx_LS_MEM_READ_SHORT", - "TKAx_LS_MEM_WRITE_FULL", - "TKAx_LS_MEM_WRITE_SHORT", - "TKAx_LS_MEM_ATOMIC", - "TKAx_VARY_INSTR", - "TKAx_VARY_SLOT_32", - "TKAx_VARY_SLOT_16", - "TKAx_ATTR_INSTR", - "TKAx_ARITH_INSTR_FP_MUL", - "TKAx_BEATS_RD_FTC", - "TKAx_BEATS_RD_FTC_EXT", - "TKAx_BEATS_RD_LSC", - "TKAx_BEATS_RD_LSC_EXT", - "TKAx_BEATS_RD_TEX", - "TKAx_BEATS_RD_TEX_EXT", - "TKAx_BEATS_RD_OTHER", - "TKAx_BEATS_WR_LSC_OTHER", - "TKAx_BEATS_WR_TIB", - "TKAx_BEATS_WR_LSC_WB", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TKAx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TKAx_L2_RD_MSG_IN", - "TKAx_L2_RD_MSG_IN_STALL", - "TKAx_L2_WR_MSG_IN", - "TKAx_L2_WR_MSG_IN_STALL", - "TKAx_L2_SNP_MSG_IN", - "TKAx_L2_SNP_MSG_IN_STALL", - "TKAx_L2_RD_MSG_OUT", - "TKAx_L2_RD_MSG_OUT_STALL", - "TKAx_L2_WR_MSG_OUT", - "TKAx_L2_ANY_LOOKUP", - "TKAx_L2_READ_LOOKUP", - "TKAx_L2_WRITE_LOOKUP", - "TKAx_L2_EXT_SNOOP_LOOKUP", - "TKAx_L2_EXT_READ", - "TKAx_L2_EXT_READ_NOSNP", - "TKAx_L2_EXT_READ_UNIQUE", - "TKAx_L2_EXT_READ_BEATS", - "TKAx_L2_EXT_AR_STALL", - "TKAx_L2_EXT_AR_CNT_Q1", - "TKAx_L2_EXT_AR_CNT_Q2", - "TKAx_L2_EXT_AR_CNT_Q3", - "TKAx_L2_EXT_RRESP_0_127", - "TKAx_L2_EXT_RRESP_128_191", - "TKAx_L2_EXT_RRESP_192_255", - "TKAx_L2_EXT_RRESP_256_319", - "TKAx_L2_EXT_RRESP_320_383", - "TKAx_L2_EXT_WRITE", - "TKAx_L2_EXT_WRITE_NOSNP_FULL", - "TKAx_L2_EXT_WRITE_NOSNP_PTL", - "TKAx_L2_EXT_WRITE_SNP_FULL", - "TKAx_L2_EXT_WRITE_SNP_PTL", - "TKAx_L2_EXT_WRITE_BEATS", - "TKAx_L2_EXT_W_STALL", - "TKAx_L2_EXT_AW_CNT_Q1", - "TKAx_L2_EXT_AW_CNT_Q2", - "TKAx_L2_EXT_AW_CNT_Q3", - "TKAx_L2_EXT_SNOOP", - "TKAx_L2_EXT_SNOOP_STALL", - "TKAx_L2_EXT_SNOOP_RESP_CLEAN", - "TKAx_L2_EXT_SNOOP_RESP_DATA", - "TKAx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TKAX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tmix.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tmix.h deleted file mode 100755 index 63eac50e0cc7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tmix.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ - -static const char * const hardware_counters_mali_tMIx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TMIx_MESSAGES_SENT", - "TMIx_MESSAGES_RECEIVED", - "TMIx_GPU_ACTIVE", - "TMIx_IRQ_ACTIVE", - "TMIx_JS0_JOBS", - "TMIx_JS0_TASKS", - "TMIx_JS0_ACTIVE", - "", - "TMIx_JS0_WAIT_READ", - "TMIx_JS0_WAIT_ISSUE", - "TMIx_JS0_WAIT_DEPEND", - "TMIx_JS0_WAIT_FINISH", - "TMIx_JS1_JOBS", - "TMIx_JS1_TASKS", - "TMIx_JS1_ACTIVE", - "", - "TMIx_JS1_WAIT_READ", - "TMIx_JS1_WAIT_ISSUE", - "TMIx_JS1_WAIT_DEPEND", - "TMIx_JS1_WAIT_FINISH", - "TMIx_JS2_JOBS", - "TMIx_JS2_TASKS", - "TMIx_JS2_ACTIVE", - "", - "TMIx_JS2_WAIT_READ", - "TMIx_JS2_WAIT_ISSUE", - "TMIx_JS2_WAIT_DEPEND", - "TMIx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TMIx_TILER_ACTIVE", - "TMIx_JOBS_PROCESSED", - "TMIx_TRIANGLES", - "TMIx_LINES", - "TMIx_POINTS", - "TMIx_FRONT_FACING", - "TMIx_BACK_FACING", - "TMIx_PRIM_VISIBLE", - "TMIx_PRIM_CULLED", - "TMIx_PRIM_CLIPPED", - "TMIx_PRIM_SAT_CULLED", - "TMIx_BIN_ALLOC_INIT", - "TMIx_BIN_ALLOC_OVERFLOW", - "TMIx_BUS_READ", - "", - "TMIx_BUS_WRITE", - "TMIx_LOADING_DESC", - "TMIx_IDVS_POS_SHAD_REQ", - "TMIx_IDVS_POS_SHAD_WAIT", - "TMIx_IDVS_POS_SHAD_STALL", - "TMIx_IDVS_POS_FIFO_FULL", - "TMIx_PREFETCH_STALL", - "TMIx_VCACHE_HIT", - "TMIx_VCACHE_MISS", - "TMIx_VCACHE_LINE_WAIT", - "TMIx_VFETCH_POS_READ_WAIT", - "TMIx_VFETCH_VERTEX_WAIT", - "TMIx_VFETCH_STALL", - "TMIx_PRIMASSY_STALL", - "TMIx_BBOX_GEN_STALL", - "TMIx_IDVS_VBU_HIT", - "TMIx_IDVS_VBU_MISS", - "TMIx_IDVS_VBU_LINE_DEALLOCATE", - "TMIx_IDVS_VAR_SHAD_REQ", - "TMIx_IDVS_VAR_SHAD_STALL", - "TMIx_BINNER_STALL", - "TMIx_ITER_STALL", - "TMIx_COMPRESS_MISS", - "TMIx_COMPRESS_STALL", - "TMIx_PCACHE_HIT", - "TMIx_PCACHE_MISS", - "TMIx_PCACHE_MISS_STALL", - "TMIx_PCACHE_EVICT_STALL", - "TMIx_PMGR_PTR_WR_STALL", - "TMIx_PMGR_PTR_RD_STALL", - "TMIx_PMGR_CMD_WR_STALL", - "TMIx_WRBUF_ACTIVE", - "TMIx_WRBUF_HIT", - "TMIx_WRBUF_MISS", - "TMIx_WRBUF_NO_FREE_LINE_STALL", - "TMIx_WRBUF_NO_AXI_ID_STALL", - "TMIx_WRBUF_AXI_STALL", - "", - "", - "", - "TMIx_UTLB_TRANS", - "TMIx_UTLB_TRANS_HIT", - "TMIx_UTLB_TRANS_STALL", - "TMIx_UTLB_TRANS_MISS_DELAY", - "TMIx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TMIx_FRAG_ACTIVE", - "TMIx_FRAG_PRIMITIVES", - "TMIx_FRAG_PRIM_RAST", - "TMIx_FRAG_FPK_ACTIVE", - "TMIx_FRAG_STARVING", - "TMIx_FRAG_WARPS", - "TMIx_FRAG_PARTIAL_WARPS", - "TMIx_FRAG_QUADS_RAST", - "TMIx_FRAG_QUADS_EZS_TEST", - "TMIx_FRAG_QUADS_EZS_UPDATE", - "TMIx_FRAG_QUADS_EZS_KILL", - "TMIx_FRAG_LZS_TEST", - "TMIx_FRAG_LZS_KILL", - "", - "TMIx_FRAG_PTILES", - "TMIx_FRAG_TRANS_ELIM", - "TMIx_QUAD_FPK_KILLER", - "", - "TMIx_COMPUTE_ACTIVE", - "TMIx_COMPUTE_TASKS", - "TMIx_COMPUTE_WARPS", - "TMIx_COMPUTE_STARVING", - "TMIx_EXEC_CORE_ACTIVE", - "TMIx_EXEC_ACTIVE", - "TMIx_EXEC_INSTR_COUNT", - "TMIx_EXEC_INSTR_DIVERGED", - "TMIx_EXEC_INSTR_STARVING", - "TMIx_ARITH_INSTR_SINGLE_FMA", - "TMIx_ARITH_INSTR_DOUBLE", - "TMIx_ARITH_INSTR_MSG", - "TMIx_ARITH_INSTR_MSG_ONLY", - "TMIx_TEX_INSTR", - "TMIx_TEX_INSTR_MIPMAP", - "TMIx_TEX_INSTR_COMPRESSED", - "TMIx_TEX_INSTR_3D", - "TMIx_TEX_INSTR_TRILINEAR", - "TMIx_TEX_COORD_ISSUE", - "TMIx_TEX_COORD_STALL", - "TMIx_TEX_STARVE_CACHE", - "TMIx_TEX_STARVE_FILTER", - "TMIx_LS_MEM_READ_FULL", - "TMIx_LS_MEM_READ_SHORT", - "TMIx_LS_MEM_WRITE_FULL", - "TMIx_LS_MEM_WRITE_SHORT", - "TMIx_LS_MEM_ATOMIC", - "TMIx_VARY_INSTR", - "TMIx_VARY_SLOT_32", - "TMIx_VARY_SLOT_16", - "TMIx_ATTR_INSTR", - "TMIx_ARITH_INSTR_FP_MUL", - "TMIx_BEATS_RD_FTC", - "TMIx_BEATS_RD_FTC_EXT", - "TMIx_BEATS_RD_LSC", - "TMIx_BEATS_RD_LSC_EXT", - "TMIx_BEATS_RD_TEX", - "TMIx_BEATS_RD_TEX_EXT", - "TMIx_BEATS_RD_OTHER", - "TMIx_BEATS_WR_LSC", - "TMIx_BEATS_WR_TIB", - "", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TMIx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TMIx_L2_RD_MSG_IN", - "TMIx_L2_RD_MSG_IN_STALL", - "TMIx_L2_WR_MSG_IN", - "TMIx_L2_WR_MSG_IN_STALL", - "TMIx_L2_SNP_MSG_IN", - "TMIx_L2_SNP_MSG_IN_STALL", - "TMIx_L2_RD_MSG_OUT", - "TMIx_L2_RD_MSG_OUT_STALL", - "TMIx_L2_WR_MSG_OUT", - "TMIx_L2_ANY_LOOKUP", - "TMIx_L2_READ_LOOKUP", - "TMIx_L2_WRITE_LOOKUP", - "TMIx_L2_EXT_SNOOP_LOOKUP", - "TMIx_L2_EXT_READ", - "TMIx_L2_EXT_READ_NOSNP", - "TMIx_L2_EXT_READ_UNIQUE", - "TMIx_L2_EXT_READ_BEATS", - "TMIx_L2_EXT_AR_STALL", - "TMIx_L2_EXT_AR_CNT_Q1", - "TMIx_L2_EXT_AR_CNT_Q2", - "TMIx_L2_EXT_AR_CNT_Q3", - "TMIx_L2_EXT_RRESP_0_127", - "TMIx_L2_EXT_RRESP_128_191", - "TMIx_L2_EXT_RRESP_192_255", - "TMIx_L2_EXT_RRESP_256_319", - "TMIx_L2_EXT_RRESP_320_383", - "TMIx_L2_EXT_WRITE", - "TMIx_L2_EXT_WRITE_NOSNP_FULL", - "TMIx_L2_EXT_WRITE_NOSNP_PTL", - "TMIx_L2_EXT_WRITE_SNP_FULL", - "TMIx_L2_EXT_WRITE_SNP_PTL", - "TMIx_L2_EXT_WRITE_BEATS", - "TMIx_L2_EXT_W_STALL", - "TMIx_L2_EXT_AW_CNT_Q1", - "TMIx_L2_EXT_AW_CNT_Q2", - "TMIx_L2_EXT_AW_CNT_Q3", - "TMIx_L2_EXT_SNOOP", - "TMIx_L2_EXT_SNOOP_STALL", - "TMIx_L2_EXT_SNOOP_RESP_CLEAN", - "TMIx_L2_EXT_SNOOP_RESP_DATA", - "TMIx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TMIX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tnox.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tnox.h deleted file mode 100755 index 932663cfb6a9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tnox.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ - -static const char * const hardware_counters_mali_tNOx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TNOx_MESSAGES_SENT", - "TNOx_MESSAGES_RECEIVED", - "TNOx_GPU_ACTIVE", - "TNOx_IRQ_ACTIVE", - "TNOx_JS0_JOBS", - "TNOx_JS0_TASKS", - "TNOx_JS0_ACTIVE", - "", - "TNOx_JS0_WAIT_READ", - "TNOx_JS0_WAIT_ISSUE", - "TNOx_JS0_WAIT_DEPEND", - "TNOx_JS0_WAIT_FINISH", - "TNOx_JS1_JOBS", - "TNOx_JS1_TASKS", - "TNOx_JS1_ACTIVE", - "", - "TNOx_JS1_WAIT_READ", - "TNOx_JS1_WAIT_ISSUE", - "TNOx_JS1_WAIT_DEPEND", - "TNOx_JS1_WAIT_FINISH", - "TNOx_JS2_JOBS", - "TNOx_JS2_TASKS", - "TNOx_JS2_ACTIVE", - "", - "TNOx_JS2_WAIT_READ", - "TNOx_JS2_WAIT_ISSUE", - "TNOx_JS2_WAIT_DEPEND", - "TNOx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TNOx_TILER_ACTIVE", - "TNOx_JOBS_PROCESSED", - "TNOx_TRIANGLES", - "TNOx_LINES", - "TNOx_POINTS", - "TNOx_FRONT_FACING", - "TNOx_BACK_FACING", - "TNOx_PRIM_VISIBLE", - "TNOx_PRIM_CULLED", - "TNOx_PRIM_CLIPPED", - "TNOx_PRIM_SAT_CULLED", - "TNOx_BIN_ALLOC_INIT", - "TNOx_BIN_ALLOC_OVERFLOW", - "TNOx_BUS_READ", - "", - "TNOx_BUS_WRITE", - "TNOx_LOADING_DESC", - "TNOx_IDVS_POS_SHAD_REQ", - "TNOx_IDVS_POS_SHAD_WAIT", - "TNOx_IDVS_POS_SHAD_STALL", - "TNOx_IDVS_POS_FIFO_FULL", - "TNOx_PREFETCH_STALL", - "TNOx_VCACHE_HIT", - "TNOx_VCACHE_MISS", - "TNOx_VCACHE_LINE_WAIT", - "TNOx_VFETCH_POS_READ_WAIT", - "TNOx_VFETCH_VERTEX_WAIT", - "TNOx_VFETCH_STALL", - "TNOx_PRIMASSY_STALL", - "TNOx_BBOX_GEN_STALL", - "TNOx_IDVS_VBU_HIT", - "TNOx_IDVS_VBU_MISS", - "TNOx_IDVS_VBU_LINE_DEALLOCATE", - "TNOx_IDVS_VAR_SHAD_REQ", - "TNOx_IDVS_VAR_SHAD_STALL", - "TNOx_BINNER_STALL", - "TNOx_ITER_STALL", - "TNOx_COMPRESS_MISS", - "TNOx_COMPRESS_STALL", - "TNOx_PCACHE_HIT", - "TNOx_PCACHE_MISS", - "TNOx_PCACHE_MISS_STALL", - "TNOx_PCACHE_EVICT_STALL", - "TNOx_PMGR_PTR_WR_STALL", - "TNOx_PMGR_PTR_RD_STALL", - "TNOx_PMGR_CMD_WR_STALL", - "TNOx_WRBUF_ACTIVE", - "TNOx_WRBUF_HIT", - "TNOx_WRBUF_MISS", - "TNOx_WRBUF_NO_FREE_LINE_STALL", - "TNOx_WRBUF_NO_AXI_ID_STALL", - "TNOx_WRBUF_AXI_STALL", - "", - "", - "", - "TNOx_UTLB_TRANS", - "TNOx_UTLB_TRANS_HIT", - "TNOx_UTLB_TRANS_STALL", - "TNOx_UTLB_TRANS_MISS_DELAY", - "TNOx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TNOx_FRAG_ACTIVE", - "TNOx_FRAG_PRIMITIVES", - "TNOx_FRAG_PRIM_RAST", - "TNOx_FRAG_FPK_ACTIVE", - "TNOx_FRAG_STARVING", - "TNOx_FRAG_WARPS", - "TNOx_FRAG_PARTIAL_WARPS", - "TNOx_FRAG_QUADS_RAST", - "TNOx_FRAG_QUADS_EZS_TEST", - "TNOx_FRAG_QUADS_EZS_UPDATE", - "TNOx_FRAG_QUADS_EZS_KILL", - "TNOx_FRAG_LZS_TEST", - "TNOx_FRAG_LZS_KILL", - "TNOx_WARP_REG_SIZE_64", - "TNOx_FRAG_PTILES", - "TNOx_FRAG_TRANS_ELIM", - "TNOx_QUAD_FPK_KILLER", - "TNOx_FULL_QUAD_WARPS", - "TNOx_COMPUTE_ACTIVE", - "TNOx_COMPUTE_TASKS", - "TNOx_COMPUTE_WARPS", - "TNOx_COMPUTE_STARVING", - "TNOx_EXEC_CORE_ACTIVE", - "TNOx_EXEC_ACTIVE", - "TNOx_EXEC_INSTR_COUNT", - "TNOx_EXEC_INSTR_DIVERGED", - "TNOx_EXEC_INSTR_STARVING", - "TNOx_ARITH_INSTR_SINGLE_FMA", - "TNOx_ARITH_INSTR_DOUBLE", - "TNOx_ARITH_INSTR_MSG", - "TNOx_ARITH_INSTR_MSG_ONLY", - "TNOx_TEX_MSGI_NUM_QUADS", - "TNOx_TEX_DFCH_NUM_PASSES", - "TNOx_TEX_DFCH_NUM_PASSES_MISS", - "TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TNOx_TEX_TFCH_NUM_LINES_FETCHED", - "TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TNOx_TEX_TFCH_NUM_OPERATIONS", - "TNOx_TEX_FILT_NUM_OPERATIONS", - "TNOx_LS_MEM_READ_FULL", - "TNOx_LS_MEM_READ_SHORT", - "TNOx_LS_MEM_WRITE_FULL", - "TNOx_LS_MEM_WRITE_SHORT", - "TNOx_LS_MEM_ATOMIC", - "TNOx_VARY_INSTR", - "TNOx_VARY_SLOT_32", - "TNOx_VARY_SLOT_16", - "TNOx_ATTR_INSTR", - "TNOx_ARITH_INSTR_FP_MUL", - "TNOx_BEATS_RD_FTC", - "TNOx_BEATS_RD_FTC_EXT", - "TNOx_BEATS_RD_LSC", - "TNOx_BEATS_RD_LSC_EXT", - "TNOx_BEATS_RD_TEX", - "TNOx_BEATS_RD_TEX_EXT", - "TNOx_BEATS_RD_OTHER", - "TNOx_BEATS_WR_LSC_OTHER", - "TNOx_BEATS_WR_TIB", - "TNOx_BEATS_WR_LSC_WB", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TNOx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TNOx_L2_RD_MSG_IN", - "TNOx_L2_RD_MSG_IN_STALL", - "TNOx_L2_WR_MSG_IN", - "TNOx_L2_WR_MSG_IN_STALL", - "TNOx_L2_SNP_MSG_IN", - "TNOx_L2_SNP_MSG_IN_STALL", - "TNOx_L2_RD_MSG_OUT", - "TNOx_L2_RD_MSG_OUT_STALL", - "TNOx_L2_WR_MSG_OUT", - "TNOx_L2_ANY_LOOKUP", - "TNOx_L2_READ_LOOKUP", - "TNOx_L2_WRITE_LOOKUP", - "TNOx_L2_EXT_SNOOP_LOOKUP", - "TNOx_L2_EXT_READ", - "TNOx_L2_EXT_READ_NOSNP", - "TNOx_L2_EXT_READ_UNIQUE", - "TNOx_L2_EXT_READ_BEATS", - "TNOx_L2_EXT_AR_STALL", - "TNOx_L2_EXT_AR_CNT_Q1", - "TNOx_L2_EXT_AR_CNT_Q2", - "TNOx_L2_EXT_AR_CNT_Q3", - "TNOx_L2_EXT_RRESP_0_127", - "TNOx_L2_EXT_RRESP_128_191", - "TNOx_L2_EXT_RRESP_192_255", - "TNOx_L2_EXT_RRESP_256_319", - "TNOx_L2_EXT_RRESP_320_383", - "TNOx_L2_EXT_WRITE", - "TNOx_L2_EXT_WRITE_NOSNP_FULL", - "TNOx_L2_EXT_WRITE_NOSNP_PTL", - "TNOx_L2_EXT_WRITE_SNP_FULL", - "TNOx_L2_EXT_WRITE_SNP_PTL", - "TNOx_L2_EXT_WRITE_BEATS", - "TNOx_L2_EXT_W_STALL", - "TNOx_L2_EXT_AW_CNT_Q1", - "TNOx_L2_EXT_AW_CNT_Q2", - "TNOx_L2_EXT_AW_CNT_Q3", - "TNOx_L2_EXT_SNOOP", - "TNOx_L2_EXT_SNOOP_STALL", - "TNOx_L2_EXT_SNOOP_RESP_CLEAN", - "TNOx_L2_EXT_SNOOP_RESP_DATA", - "TNOx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TNOX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tsix.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tsix.h deleted file mode 100755 index b8dde32bc529..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_tsix.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ - -static const char * const hardware_counters_mali_tSIx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TSIx_MESSAGES_SENT", - "TSIx_MESSAGES_RECEIVED", - "TSIx_GPU_ACTIVE", - "TSIx_IRQ_ACTIVE", - "TSIx_JS0_JOBS", - "TSIx_JS0_TASKS", - "TSIx_JS0_ACTIVE", - "", - "TSIx_JS0_WAIT_READ", - "TSIx_JS0_WAIT_ISSUE", - "TSIx_JS0_WAIT_DEPEND", - "TSIx_JS0_WAIT_FINISH", - "TSIx_JS1_JOBS", - "TSIx_JS1_TASKS", - "TSIx_JS1_ACTIVE", - "", - "TSIx_JS1_WAIT_READ", - "TSIx_JS1_WAIT_ISSUE", - "TSIx_JS1_WAIT_DEPEND", - "TSIx_JS1_WAIT_FINISH", - "TSIx_JS2_JOBS", - "TSIx_JS2_TASKS", - "TSIx_JS2_ACTIVE", - "", - "TSIx_JS2_WAIT_READ", - "TSIx_JS2_WAIT_ISSUE", - "TSIx_JS2_WAIT_DEPEND", - "TSIx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TSIx_TILER_ACTIVE", - "TSIx_JOBS_PROCESSED", - "TSIx_TRIANGLES", - "TSIx_LINES", - "TSIx_POINTS", - "TSIx_FRONT_FACING", - "TSIx_BACK_FACING", - "TSIx_PRIM_VISIBLE", - "TSIx_PRIM_CULLED", - "TSIx_PRIM_CLIPPED", - "TSIx_PRIM_SAT_CULLED", - "TSIx_BIN_ALLOC_INIT", - "TSIx_BIN_ALLOC_OVERFLOW", - "TSIx_BUS_READ", - "", - "TSIx_BUS_WRITE", - "TSIx_LOADING_DESC", - "TSIx_IDVS_POS_SHAD_REQ", - "TSIx_IDVS_POS_SHAD_WAIT", - "TSIx_IDVS_POS_SHAD_STALL", - "TSIx_IDVS_POS_FIFO_FULL", - "TSIx_PREFETCH_STALL", - "TSIx_VCACHE_HIT", - "TSIx_VCACHE_MISS", - "TSIx_VCACHE_LINE_WAIT", - "TSIx_VFETCH_POS_READ_WAIT", - "TSIx_VFETCH_VERTEX_WAIT", - "TSIx_VFETCH_STALL", - "TSIx_PRIMASSY_STALL", - "TSIx_BBOX_GEN_STALL", - "TSIx_IDVS_VBU_HIT", - "TSIx_IDVS_VBU_MISS", - "TSIx_IDVS_VBU_LINE_DEALLOCATE", - "TSIx_IDVS_VAR_SHAD_REQ", - "TSIx_IDVS_VAR_SHAD_STALL", - "TSIx_BINNER_STALL", - "TSIx_ITER_STALL", - "TSIx_COMPRESS_MISS", - "TSIx_COMPRESS_STALL", - "TSIx_PCACHE_HIT", - "TSIx_PCACHE_MISS", - "TSIx_PCACHE_MISS_STALL", - "TSIx_PCACHE_EVICT_STALL", - "TSIx_PMGR_PTR_WR_STALL", - "TSIx_PMGR_PTR_RD_STALL", - "TSIx_PMGR_CMD_WR_STALL", - "TSIx_WRBUF_ACTIVE", - "TSIx_WRBUF_HIT", - "TSIx_WRBUF_MISS", - "TSIx_WRBUF_NO_FREE_LINE_STALL", - "TSIx_WRBUF_NO_AXI_ID_STALL", - "TSIx_WRBUF_AXI_STALL", - "", - "", - "", - "TSIx_UTLB_TRANS", - "TSIx_UTLB_TRANS_HIT", - "TSIx_UTLB_TRANS_STALL", - "TSIx_UTLB_TRANS_MISS_DELAY", - "TSIx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TSIx_FRAG_ACTIVE", - "TSIx_FRAG_PRIMITIVES", - "TSIx_FRAG_PRIM_RAST", - "TSIx_FRAG_FPK_ACTIVE", - "TSIx_FRAG_STARVING", - "TSIx_FRAG_WARPS", - "TSIx_FRAG_PARTIAL_WARPS", - "TSIx_FRAG_QUADS_RAST", - "TSIx_FRAG_QUADS_EZS_TEST", - "TSIx_FRAG_QUADS_EZS_UPDATE", - "TSIx_FRAG_QUADS_EZS_KILL", - "TSIx_FRAG_LZS_TEST", - "TSIx_FRAG_LZS_KILL", - "", - "TSIx_FRAG_PTILES", - "TSIx_FRAG_TRANS_ELIM", - "TSIx_QUAD_FPK_KILLER", - "", - "TSIx_COMPUTE_ACTIVE", - "TSIx_COMPUTE_TASKS", - "TSIx_COMPUTE_WARPS", - "TSIx_COMPUTE_STARVING", - "TSIx_EXEC_CORE_ACTIVE", - "TSIx_EXEC_ACTIVE", - "TSIx_EXEC_INSTR_COUNT", - "TSIx_EXEC_INSTR_DIVERGED", - "TSIx_EXEC_INSTR_STARVING", - "TSIx_ARITH_INSTR_SINGLE_FMA", - "TSIx_ARITH_INSTR_DOUBLE", - "TSIx_ARITH_INSTR_MSG", - "TSIx_ARITH_INSTR_MSG_ONLY", - "TSIx_TEX_MSGI_NUM_QUADS", - "TSIx_TEX_DFCH_NUM_PASSES", - "TSIx_TEX_DFCH_NUM_PASSES_MISS", - "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TSIx_TEX_TFCH_NUM_LINES_FETCHED", - "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TSIx_TEX_TFCH_NUM_OPERATIONS", - "TSIx_TEX_FILT_NUM_OPERATIONS", - "TSIx_LS_MEM_READ_FULL", - "TSIx_LS_MEM_READ_SHORT", - "TSIx_LS_MEM_WRITE_FULL", - "TSIx_LS_MEM_WRITE_SHORT", - "TSIx_LS_MEM_ATOMIC", - "TSIx_VARY_INSTR", - "TSIx_VARY_SLOT_32", - "TSIx_VARY_SLOT_16", - "TSIx_ATTR_INSTR", - "TSIx_ARITH_INSTR_FP_MUL", - "TSIx_BEATS_RD_FTC", - "TSIx_BEATS_RD_FTC_EXT", - "TSIx_BEATS_RD_LSC", - "TSIx_BEATS_RD_LSC_EXT", - "TSIx_BEATS_RD_TEX", - "TSIx_BEATS_RD_TEX_EXT", - "TSIx_BEATS_RD_OTHER", - "TSIx_BEATS_WR_LSC_OTHER", - "TSIx_BEATS_WR_TIB", - "TSIx_BEATS_WR_LSC_WB", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TSIx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TSIx_L2_RD_MSG_IN", - "TSIx_L2_RD_MSG_IN_STALL", - "TSIx_L2_WR_MSG_IN", - "TSIx_L2_WR_MSG_IN_STALL", - "TSIx_L2_SNP_MSG_IN", - "TSIx_L2_SNP_MSG_IN_STALL", - "TSIx_L2_RD_MSG_OUT", - "TSIx_L2_RD_MSG_OUT_STALL", - "TSIx_L2_WR_MSG_OUT", - "TSIx_L2_ANY_LOOKUP", - "TSIx_L2_READ_LOOKUP", - "TSIx_L2_WRITE_LOOKUP", - "TSIx_L2_EXT_SNOOP_LOOKUP", - "TSIx_L2_EXT_READ", - "TSIx_L2_EXT_READ_NOSNP", - "TSIx_L2_EXT_READ_UNIQUE", - "TSIx_L2_EXT_READ_BEATS", - "TSIx_L2_EXT_AR_STALL", - "TSIx_L2_EXT_AR_CNT_Q1", - "TSIx_L2_EXT_AR_CNT_Q2", - "TSIx_L2_EXT_AR_CNT_Q3", - "TSIx_L2_EXT_RRESP_0_127", - "TSIx_L2_EXT_RRESP_128_191", - "TSIx_L2_EXT_RRESP_192_255", - "TSIx_L2_EXT_RRESP_256_319", - "TSIx_L2_EXT_RRESP_320_383", - "TSIx_L2_EXT_WRITE", - "TSIx_L2_EXT_WRITE_NOSNP_FULL", - "TSIx_L2_EXT_WRITE_NOSNP_PTL", - "TSIx_L2_EXT_WRITE_SNP_FULL", - "TSIx_L2_EXT_WRITE_SNP_PTL", - "TSIx_L2_EXT_WRITE_BEATS", - "TSIx_L2_EXT_W_STALL", - "TSIx_L2_EXT_AW_CNT_Q1", - "TSIx_L2_EXT_AW_CNT_Q2", - "TSIx_L2_EXT_AW_CNT_Q3", - "TSIx_L2_EXT_SNOOP", - "TSIx_L2_EXT_SNOOP_STALL", - "TSIx_L2_EXT_SNOOP_RESP_CLEAN", - "TSIx_L2_EXT_SNOOP_RESP_DATA", - "TSIx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TSIX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_ttrx.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_ttrx.h deleted file mode 100755 index c1e315b0f534..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gator_hwcnt_names_ttrx.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * This header was autogenerated, it should not be edited. - */ - -#ifndef _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ -#define _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ - -static const char * const hardware_counters_mali_tTRx[] = { - /* Performance counters for the Job Manager */ - "", - "", - "", - "", - "TTRx_MESSAGES_SENT", - "TTRx_MESSAGES_RECEIVED", - "TTRx_GPU_ACTIVE", - "TTRx_IRQ_ACTIVE", - "TTRx_JS0_JOBS", - "TTRx_JS0_TASKS", - "TTRx_JS0_ACTIVE", - "", - "TTRx_JS0_WAIT_READ", - "TTRx_JS0_WAIT_ISSUE", - "TTRx_JS0_WAIT_DEPEND", - "TTRx_JS0_WAIT_FINISH", - "TTRx_JS1_JOBS", - "TTRx_JS1_TASKS", - "TTRx_JS1_ACTIVE", - "", - "TTRx_JS1_WAIT_READ", - "TTRx_JS1_WAIT_ISSUE", - "TTRx_JS1_WAIT_DEPEND", - "TTRx_JS1_WAIT_FINISH", - "TTRx_JS2_JOBS", - "TTRx_JS2_TASKS", - "TTRx_JS2_ACTIVE", - "", - "TTRx_JS2_WAIT_READ", - "TTRx_JS2_WAIT_ISSUE", - "TTRx_JS2_WAIT_DEPEND", - "TTRx_JS2_WAIT_FINISH", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - - /* Performance counters for the Tiler */ - "", - "", - "", - "", - "TTRx_TILER_ACTIVE", - "TTRx_JOBS_PROCESSED", - "TTRx_TRIANGLES", - "TTRx_LINES", - "TTRx_POINTS", - "TTRx_FRONT_FACING", - "TTRx_BACK_FACING", - "TTRx_PRIM_VISIBLE", - "TTRx_PRIM_CULLED", - "TTRx_PRIM_CLIPPED", - "TTRx_PRIM_SAT_CULLED", - "TTRx_BIN_ALLOC_INIT", - "TTRx_BIN_ALLOC_OVERFLOW", - "TTRx_BUS_READ", - "", - "TTRx_BUS_WRITE", - "TTRx_LOADING_DESC", - "TTRx_IDVS_POS_SHAD_REQ", - "TTRx_IDVS_POS_SHAD_WAIT", - "TTRx_IDVS_POS_SHAD_STALL", - "TTRx_IDVS_POS_FIFO_FULL", - "TTRx_PREFETCH_STALL", - "TTRx_VCACHE_HIT", - "TTRx_VCACHE_MISS", - "TTRx_VCACHE_LINE_WAIT", - "TTRx_VFETCH_POS_READ_WAIT", - "TTRx_VFETCH_VERTEX_WAIT", - "TTRx_VFETCH_STALL", - "TTRx_PRIMASSY_STALL", - "TTRx_BBOX_GEN_STALL", - "TTRx_IDVS_VBU_HIT", - "TTRx_IDVS_VBU_MISS", - "TTRx_IDVS_VBU_LINE_DEALLOCATE", - "TTRx_IDVS_VAR_SHAD_REQ", - "TTRx_IDVS_VAR_SHAD_STALL", - "TTRx_BINNER_STALL", - "TTRx_ITER_STALL", - "TTRx_COMPRESS_MISS", - "TTRx_COMPRESS_STALL", - "TTRx_PCACHE_HIT", - "TTRx_PCACHE_MISS", - "TTRx_PCACHE_MISS_STALL", - "TTRx_PCACHE_EVICT_STALL", - "TTRx_PMGR_PTR_WR_STALL", - "TTRx_PMGR_PTR_RD_STALL", - "TTRx_PMGR_CMD_WR_STALL", - "TTRx_WRBUF_ACTIVE", - "TTRx_WRBUF_HIT", - "TTRx_WRBUF_MISS", - "TTRx_WRBUF_NO_FREE_LINE_STALL", - "TTRx_WRBUF_NO_AXI_ID_STALL", - "TTRx_WRBUF_AXI_STALL", - "", - "", - "", - "TTRx_UTLB_TRANS", - "TTRx_UTLB_TRANS_HIT", - "TTRx_UTLB_TRANS_STALL", - "TTRx_UTLB_TRANS_MISS_DELAY", - "TTRx_UTLB_MMU_REQ", - - /* Performance counters for the Shader Core */ - "", - "", - "", - "", - "TTRx_FRAG_ACTIVE", - "TTRx_FRAG_PRIMITIVES", - "TTRx_FRAG_PRIM_RAST", - "TTRx_FRAG_FPK_ACTIVE", - "TTRx_FRAG_STARVING", - "TTRx_FRAG_WARPS", - "TTRx_FRAG_PARTIAL_WARPS", - "TTRx_FRAG_QUADS_RAST", - "TTRx_FRAG_QUADS_EZS_TEST", - "TTRx_FRAG_QUADS_EZS_UPDATE", - "TTRx_FRAG_QUADS_EZS_KILL", - "TTRx_FRAG_LZS_TEST", - "TTRx_FRAG_LZS_KILL", - "TTRx_WARP_REG_SIZE_64", - "TTRx_FRAG_PTILES", - "TTRx_FRAG_TRANS_ELIM", - "TTRx_QUAD_FPK_KILLER", - "TTRx_FULL_QUAD_WARPS", - "TTRx_COMPUTE_ACTIVE", - "TTRx_COMPUTE_TASKS", - "TTRx_COMPUTE_WARPS", - "TTRx_COMPUTE_STARVING", - "TTRx_EXEC_CORE_ACTIVE", - "TTRx_EXEC_INSTR_FMA", - "TTRx_EXEC_INSTR_CVT", - "TTRx_EXEC_INSTR_SFU", - "TTRx_EXEC_INSTR_MSG", - "TTRx_EXEC_INSTR_DIVERGED", - "TTRx_EXEC_ICACHE_MISS", - "TTRx_EXEC_STARVE_ARITH", - "TTRx_CALL_BLEND_SHADER", - "TTRx_TEX_MSGI_NUM_QUADS", - "TTRx_TEX_DFCH_NUM_PASSES", - "TTRx_TEX_DFCH_NUM_PASSES_MISS", - "TTRx_TEX_DFCH_NUM_PASSES_MIP_MAP", - "TTRx_TEX_TIDX_NUM_SPLIT_MIP_MAP", - "TTRx_TEX_TFCH_NUM_LINES_FETCHED", - "TTRx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK", - "TTRx_TEX_TFCH_NUM_OPERATIONS", - "TTRx_TEX_FILT_NUM_OPERATIONS", - "TTRx_LS_MEM_READ_FULL", - "TTRx_LS_MEM_READ_SHORT", - "TTRx_LS_MEM_WRITE_FULL", - "TTRx_LS_MEM_WRITE_SHORT", - "TTRx_LS_MEM_ATOMIC", - "TTRx_VARY_INSTR", - "TTRx_VARY_SLOT_32", - "TTRx_VARY_SLOT_16", - "TTRx_ATTR_INSTR", - "TTRx_ARITH_INSTR_FP_MUL", - "TTRx_BEATS_RD_FTC", - "TTRx_BEATS_RD_FTC_EXT", - "TTRx_BEATS_RD_LSC", - "TTRx_BEATS_RD_LSC_EXT", - "TTRx_BEATS_RD_TEX", - "TTRx_BEATS_RD_TEX_EXT", - "TTRx_BEATS_RD_OTHER", - "TTRx_BEATS_WR_LSC_OTHER", - "TTRx_BEATS_WR_TIB", - "TTRx_BEATS_WR_LSC_WB", - - /* Performance counters for the Memory System */ - "", - "", - "", - "", - "TTRx_MMU_REQUESTS", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "TTRx_L2_RD_MSG_IN", - "TTRx_L2_RD_MSG_IN_STALL", - "TTRx_L2_WR_MSG_IN", - "TTRx_L2_WR_MSG_IN_STALL", - "TTRx_L2_SNP_MSG_IN", - "TTRx_L2_SNP_MSG_IN_STALL", - "TTRx_L2_RD_MSG_OUT", - "TTRx_L2_RD_MSG_OUT_STALL", - "TTRx_L2_WR_MSG_OUT", - "TTRx_L2_ANY_LOOKUP", - "TTRx_L2_READ_LOOKUP", - "TTRx_L2_WRITE_LOOKUP", - "TTRx_L2_EXT_SNOOP_LOOKUP", - "TTRx_L2_EXT_READ", - "TTRx_L2_EXT_READ_NOSNP", - "TTRx_L2_EXT_READ_UNIQUE", - "TTRx_L2_EXT_READ_BEATS", - "TTRx_L2_EXT_AR_STALL", - "TTRx_L2_EXT_AR_CNT_Q1", - "TTRx_L2_EXT_AR_CNT_Q2", - "TTRx_L2_EXT_AR_CNT_Q3", - "TTRx_L2_EXT_RRESP_0_127", - "TTRx_L2_EXT_RRESP_128_191", - "TTRx_L2_EXT_RRESP_192_255", - "TTRx_L2_EXT_RRESP_256_319", - "TTRx_L2_EXT_RRESP_320_383", - "TTRx_L2_EXT_WRITE", - "TTRx_L2_EXT_WRITE_NOSNP_FULL", - "TTRx_L2_EXT_WRITE_NOSNP_PTL", - "TTRx_L2_EXT_WRITE_SNP_FULL", - "TTRx_L2_EXT_WRITE_SNP_PTL", - "TTRx_L2_EXT_WRITE_BEATS", - "TTRx_L2_EXT_W_STALL", - "TTRx_L2_EXT_AW_CNT_Q1", - "TTRx_L2_EXT_AW_CNT_Q2", - "TTRx_L2_EXT_AW_CNT_Q3", - "TTRx_L2_EXT_SNOOP", - "TTRx_L2_EXT_SNOOP_STALL", - "TTRx_L2_EXT_SNOOP_RESP_CLEAN", - "TTRx_L2_EXT_SNOOP_RESP_DATA", - "TTRx_L2_EXT_SNOOP_INTERNAL", - "", - "", - "", - "", - "", - "", - "", -}; - -#endif /* _KBASE_GATOR_HWCNT_NAMES_TTRX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_id.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_id.h deleted file mode 100755 index d432f8e056c9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_id.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#ifndef _KBASE_GPU_ID_H_ -#define _KBASE_GPU_ID_H_ - -/* GPU_ID register */ -#define GPU_ID_VERSION_STATUS_SHIFT 0 -#define GPU_ID_VERSION_MINOR_SHIFT 4 -#define GPU_ID_VERSION_MAJOR_SHIFT 12 -#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) - -/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ -#define GPU_ID_PI_T60X 0x6956u -#define GPU_ID_PI_T62X 0x0620u -#define GPU_ID_PI_T76X 0x0750u -#define GPU_ID_PI_T72X 0x0720u -#define GPU_ID_PI_TFRX 0x0880u -#define GPU_ID_PI_T86X 0x0860u -#define GPU_ID_PI_T82X 0x0820u -#define GPU_ID_PI_T83X 0x0830u - -/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ -#define GPU_ID_PI_NEW_FORMAT_START 0x1000 -#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \ - (product_id) >= \ - GPU_ID_PI_NEW_FORMAT_START) - -#define GPU_ID2_VERSION_STATUS_SHIFT 0 -#define GPU_ID2_VERSION_MINOR_SHIFT 4 -#define GPU_ID2_VERSION_MAJOR_SHIFT 12 -#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 -#define GPU_ID2_ARCH_REV_SHIFT 20 -#define GPU_ID2_ARCH_MINOR_SHIFT 24 -#define GPU_ID2_ARCH_MAJOR_SHIFT 28 -#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) -#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) -#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) -#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) -#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) -#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) -#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) -#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ - GPU_ID2_VERSION_MINOR | \ - GPU_ID2_VERSION_STATUS) - -/* Helper macro to create a partial GPU_ID (new format) that defines - a product ignoring its version. */ -#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ - (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) - -/* Helper macro to create a partial GPU_ID (new format) that specifies the - revision (major, minor, status) of a product */ -#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ - ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ - (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ - (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) - -/* Helper macro to create a complete GPU_ID (new format) */ -#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ - version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ - product_major) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ - version_status)) - -/* Helper macro to create a partial GPU_ID (new format) that identifies - a particular GPU model by its arch_major and product_major. */ -#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ - ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ - (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) - -/* Strip off the non-relevant bits from a product_id value and make it suitable - for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU - model. */ -#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ - ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ - GPU_ID2_PRODUCT_MODEL) - -#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) -#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) -#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) -#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) -#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) -#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) -#define GPU_ID2_PRODUCT_TKAX GPU_ID2_MODEL_MAKE(8, 0) -#define GPU_ID2_PRODUCT_TBOX GPU_ID2_MODEL_MAKE(8, 2) -#define GPU_ID2_PRODUCT_TEGX GPU_ID2_MODEL_MAKE(8, 3) -#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) -#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) -#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) -#define GPU_ID2_PRODUCT_TULX GPU_ID2_MODEL_MAKE(10, 0) -#define GPU_ID2_PRODUCT_TIDX GPU_ID2_MODEL_MAKE(10, 3) -#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) - -/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ -#define GPU_ID_S_15DEV0 0x1 -#define GPU_ID_S_EAC 0x2 - -/* Helper macro to create a GPU_ID assuming valid values for id, major, - minor, status */ -#define GPU_ID_MAKE(id, major, minor, status) \ - ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) - -#endif /* _KBASE_GPU_ID_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.c deleted file mode 100755 index 514b065d4867..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include - -#ifdef CONFIG_DEBUG_FS -/** Show callback for the @c gpu_memory debugfs file. - * - * This function is called to get the contents of the @c gpu_memory debugfs - * file. This is a report of current gpu memory usage. - * - * @param sfile The debugfs entry - * @param data Data associated with the entry - * - * @return 0 if successfully prints data in debugfs entry file - * -1 if it encountered an error - */ - -static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) -{ - struct list_head *entry; - const struct list_head *kbdev_list; - - kbdev_list = kbase_dev_list_get(); - list_for_each(entry, kbdev_list) { - struct kbase_device *kbdev = NULL; - struct kbasep_kctx_list_element *element; - - kbdev = list_entry(entry, struct kbase_device, entry); - /* output the total memory usage and cap for this device */ - seq_printf(sfile, "%-16s %10u\n", - kbdev->devname, - atomic_read(&(kbdev->memdev.used_pages))); - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry(element, &kbdev->kctx_list, link) { - /* output the memory usage and cap for each kctx - * opened on this device */ - seq_printf(sfile, " %s-0x%p %10u\n", - "kctx", - element->kctx, - atomic_read(&(element->kctx->used_pages))); - } - mutex_unlock(&kbdev->kctx_list_lock); - } - kbase_dev_list_put(kbdev_list); - return 0; -} - -/* - * File operations related to debugfs entry for gpu_memory - */ -static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) -{ - return single_open(file, kbasep_gpu_memory_seq_show, NULL); -} - -static const struct file_operations kbasep_gpu_memory_debugfs_fops = { - .open = kbasep_gpu_memory_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * Initialize debugfs entry for gpu_memory - */ -void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_file("gpu_memory", S_IRUGO, - kbdev->mali_debugfs_directory, NULL, - &kbasep_gpu_memory_debugfs_fops); - return; -} - -#else -/* - * Stub functions for when debugfs is disabled - */ -void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) -{ - return; -} -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.h deleted file mode 100755 index 28a871a0da4f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpu_memory_debugfs.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_gpu_memory_debugfs.h - * Header file for gpu_memory entry in debugfs - * - */ - -#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H -#define _KBASE_GPU_MEMORY_DEBUGFS_H - -#include -#include - -/** - * @brief Initialize gpu_memory debugfs entry - */ -void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); - -#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.c deleted file mode 100755 index 62ba105ca417..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.c +++ /dev/null @@ -1,482 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Base kernel property query APIs - */ - -#include -#include -#include -#include -#include "mali_kbase_ioctl.h" -#include - -/** - * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. - * @value: The value from which to extract bits. - * @offset: The first bit to extract (0 being the LSB). - * @size: The number of bits to extract. - * - * Context: @offset + @size <= 32. - * - * Return: Bits [@offset, @offset + @size) from @value. - */ -/* from mali_cdsb.h */ -#define KBASE_UBFX32(value, offset, size) \ - (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) - -static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) -{ - struct mali_base_gpu_coherent_group *current_group; - u64 group_present; - u64 group_mask; - u64 first_set, first_set_prev; - u32 num_groups = 0; - - KBASE_DEBUG_ASSERT(NULL != props); - - props->coherency_info.coherency = props->raw_props.mem_features; - props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); - - if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { - /* Group is l2 coherent */ - group_present = props->raw_props.l2_present; - } else { - /* Group is l1 coherent */ - group_present = props->raw_props.shader_present; - } - - /* - * The coherent group mask can be computed from the l2 present - * register. - * - * For the coherent group n: - * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) - * where first_set is group_present with only its nth set-bit kept - * (i.e. the position from where a new group starts). - * - * For instance if the groups are l2 coherent and l2_present=0x0..01111: - * The first mask is: - * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) - * = (0x0..010 - 1) & ~(0x0..01 - 1) - * = 0x0..00f - * The second mask is: - * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) - * = (0x0..100 - 1) & ~(0x0..010 - 1) - * = 0x0..0f0 - * And so on until all the bits from group_present have been cleared - * (i.e. there is no group left). - */ - - current_group = props->coherency_info.group; - first_set = group_present & ~(group_present - 1); - - while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { - group_present -= first_set; /* Clear the current group bit */ - first_set_prev = first_set; - - first_set = group_present & ~(group_present - 1); - group_mask = (first_set - 1) & ~(first_set_prev - 1); - - /* Populate the coherent_group structure for each group */ - current_group->core_mask = group_mask & props->raw_props.shader_present; - current_group->num_cores = hweight64(current_group->core_mask); - - num_groups++; - current_group++; - } - - if (group_present != 0) - pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); - - props->coherency_info.num_groups = num_groups; -} - -/** - * kbase_gpuprops_get_props - Get the GPU configuration - * @gpu_props: The &base_gpu_props structure - * @kbdev: The &struct kbase_device structure for the device - * - * Fill the &base_gpu_props structure with values from the GPU configuration - * registers. Only the raw properties are filled in this function - */ -static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) -{ - struct kbase_gpuprops_regdump regdump; - int i; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != gpu_props); - - /* Dump relevant registers */ - kbase_backend_gpuprops_get(kbdev, ®dump); - - gpu_props->raw_props.gpu_id = regdump.gpu_id; - gpu_props->raw_props.tiler_features = regdump.tiler_features; - gpu_props->raw_props.mem_features = regdump.mem_features; - gpu_props->raw_props.mmu_features = regdump.mmu_features; - gpu_props->raw_props.l2_features = regdump.l2_features; - gpu_props->raw_props.core_features = regdump.core_features; - - gpu_props->raw_props.as_present = regdump.as_present; - gpu_props->raw_props.js_present = regdump.js_present; - gpu_props->raw_props.shader_present = - ((u64) regdump.shader_present_hi << 32) + - regdump.shader_present_lo; - gpu_props->raw_props.tiler_present = - ((u64) regdump.tiler_present_hi << 32) + - regdump.tiler_present_lo; - gpu_props->raw_props.l2_present = - ((u64) regdump.l2_present_hi << 32) + - regdump.l2_present_lo; - gpu_props->raw_props.stack_present = - ((u64) regdump.stack_present_hi << 32) + - regdump.stack_present_lo; - - for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) - gpu_props->raw_props.js_features[i] = regdump.js_features[i]; - - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; - - gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; - gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; - gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; - gpu_props->raw_props.thread_features = regdump.thread_features; - gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; -} - -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props) -{ - gpu_props->core_props.version_status = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); - gpu_props->core_props.minor_revision = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); - gpu_props->core_props.major_revision = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); - gpu_props->core_props.product_id = - KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); -} - -/** - * kbase_gpuprops_calculate_props - Calculate the derived properties - * @gpu_props: The &base_gpu_props structure - * @kbdev: The &struct kbase_device structure for the device - * - * Fill the &base_gpu_props structure with values derived from the GPU - * configuration registers - */ -static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) -{ - int i; - - /* Populate the base_gpu_props structure */ - kbase_gpuprops_update_core_props_gpu_id(gpu_props); - gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; - gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; - gpu_props->core_props.num_exec_engines = - KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); - - for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) - gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; - - gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); - gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); - - /* Field with number of l2 slices is added to MEM_FEATURES register - * since t76x. Below code assumes that for older GPU reserved bits will - * be read as zero. */ - gpu_props->l2_props.num_l2_slices = - KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; - - gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); - gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); - - if (gpu_props->raw_props.thread_max_threads == 0) - gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; - else - gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; - - if (gpu_props->raw_props.thread_max_workgroup_size == 0) - gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; - else - gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; - - if (gpu_props->raw_props.thread_max_barrier_size == 0) - gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; - else - gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; - - if (gpu_props->raw_props.thread_tls_alloc == 0) - gpu_props->thread_props.tls_alloc = - gpu_props->thread_props.max_threads; - else - gpu_props->thread_props.tls_alloc = - gpu_props->raw_props.thread_tls_alloc; - - gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); - gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); - gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); - gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); - - /* If values are not specified, then use defaults */ - if (gpu_props->thread_props.max_registers == 0) { - gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; - gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; - gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; - } - /* Initialize the coherent_group structure for each group */ - kbase_gpuprops_construct_coherent_groups(gpu_props); -} - -void kbase_gpuprops_set(struct kbase_device *kbdev) -{ - struct kbase_gpu_props *gpu_props; - struct gpu_raw_gpu_props *raw; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - gpu_props = &kbdev->gpu_props; - raw = &gpu_props->props.raw_props; - - /* Initialize the base_gpu_props structure from the hardware */ - kbase_gpuprops_get_props(&gpu_props->props, kbdev); - - /* Populate the derived properties */ - kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); - - /* Populate kbase-only fields */ - gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); - gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); - - gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); - - gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); - gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); - - gpu_props->num_cores = hweight64(raw->shader_present); - gpu_props->num_core_groups = hweight64(raw->l2_present); - gpu_props->num_address_spaces = hweight32(raw->as_present); - gpu_props->num_job_slots = hweight32(raw->js_present); -} - -void kbase_gpuprops_set_features(struct kbase_device *kbdev) -{ - base_gpu_props *gpu_props; - struct kbase_gpuprops_regdump regdump; - - gpu_props = &kbdev->gpu_props.props; - - /* Dump relevant registers */ - kbase_backend_gpuprops_get_features(kbdev, ®dump); - - /* - * Copy the raw value from the register, later this will get turned - * into the selected coherency mode. - * Additionally, add non-coherent mode, as this is always supported. - */ - gpu_props->raw_props.coherency_mode = regdump.coherency_features | - COHERENCY_FEATURE_BIT(COHERENCY_NONE); - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) - gpu_props->thread_props.max_thread_group_split = 0; -} - -static struct { - u32 type; - size_t offset; - int size; -} gpu_property_mapping[] = { -#define PROP(name, member) \ - {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ - sizeof(((struct base_gpu_props *)0)->member)} - PROP(PRODUCT_ID, core_props.product_id), - PROP(VERSION_STATUS, core_props.version_status), - PROP(MINOR_REVISION, core_props.minor_revision), - PROP(MAJOR_REVISION, core_props.major_revision), - PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), - PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), - PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), - PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), - PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), - PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), - PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), - PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), - - PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), - PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), - PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), - - PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), - PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), - - PROP(MAX_THREADS, thread_props.max_threads), - PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), - PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), - PROP(MAX_REGISTERS, thread_props.max_registers), - PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), - PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), - PROP(IMPL_TECH, thread_props.impl_tech), - PROP(TLS_ALLOC, thread_props.tls_alloc), - - PROP(RAW_SHADER_PRESENT, raw_props.shader_present), - PROP(RAW_TILER_PRESENT, raw_props.tiler_present), - PROP(RAW_L2_PRESENT, raw_props.l2_present), - PROP(RAW_STACK_PRESENT, raw_props.stack_present), - PROP(RAW_L2_FEATURES, raw_props.l2_features), - PROP(RAW_CORE_FEATURES, raw_props.core_features), - PROP(RAW_MEM_FEATURES, raw_props.mem_features), - PROP(RAW_MMU_FEATURES, raw_props.mmu_features), - PROP(RAW_AS_PRESENT, raw_props.as_present), - PROP(RAW_JS_PRESENT, raw_props.js_present), - PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), - PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), - PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), - PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), - PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), - PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), - PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), - PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), - PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), - PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), - PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), - PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), - PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), - PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), - PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), - PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), - PROP(RAW_TILER_FEATURES, raw_props.tiler_features), - PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), - PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), - PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), - PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), - PROP(RAW_GPU_ID, raw_props.gpu_id), - PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), - PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, - raw_props.thread_max_workgroup_size), - PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), - PROP(RAW_THREAD_FEATURES, raw_props.thread_features), - PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), - PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), - - PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), - PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), - PROP(COHERENCY_COHERENCY, coherency_info.coherency), - PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), - PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), - PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), - PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), - PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), - PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), - PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), - PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), - PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), - PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), - PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), - PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), - PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), - PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), - PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), - PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), - -#undef PROP -}; - -int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) -{ - struct kbase_gpu_props *kprops = &kbdev->gpu_props; - struct base_gpu_props *props = &kprops->props; - u32 count = ARRAY_SIZE(gpu_property_mapping); - u32 i; - u32 size = 0; - u8 *p; - - for (i = 0; i < count; i++) { - /* 4 bytes for the ID, and the size of the property */ - size += 4 + gpu_property_mapping[i].size; - } - - kprops->prop_buffer_size = size; - kprops->prop_buffer = kmalloc(size, GFP_KERNEL); - - if (!kprops->prop_buffer) { - kprops->prop_buffer_size = 0; - return -ENOMEM; - } - - p = kprops->prop_buffer; - -#define WRITE_U8(v) (*p++ = (v) & 0xFF) -#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) -#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) -#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) - - for (i = 0; i < count; i++) { - u32 type = gpu_property_mapping[i].type; - u8 type_size; - void *field = ((u8 *)props) + gpu_property_mapping[i].offset; - - switch (gpu_property_mapping[i].size) { - case 1: - type_size = KBASE_GPUPROP_VALUE_SIZE_U8; - break; - case 2: - type_size = KBASE_GPUPROP_VALUE_SIZE_U16; - break; - case 4: - type_size = KBASE_GPUPROP_VALUE_SIZE_U32; - break; - case 8: - type_size = KBASE_GPUPROP_VALUE_SIZE_U64; - break; - default: - dev_err(kbdev->dev, - "Invalid gpu_property_mapping type=%d size=%d", - type, gpu_property_mapping[i].size); - return -EINVAL; - } - - WRITE_U32((type<<2) | type_size); - - switch (type_size) { - case KBASE_GPUPROP_VALUE_SIZE_U8: - WRITE_U8(*((u8 *)field)); - break; - case KBASE_GPUPROP_VALUE_SIZE_U16: - WRITE_U16(*((u16 *)field)); - break; - case KBASE_GPUPROP_VALUE_SIZE_U32: - WRITE_U32(*((u32 *)field)); - break; - case KBASE_GPUPROP_VALUE_SIZE_U64: - WRITE_U64(*((u64 *)field)); - break; - default: /* Cannot be reached */ - WARN_ON(1); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.h deleted file mode 100755 index 37d9c08770bf..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_gpuprops.h - * Base kernel property query APIs - */ - -#ifndef _KBASE_GPUPROPS_H_ -#define _KBASE_GPUPROPS_H_ - -#include "mali_kbase_gpuprops_types.h" - -/* Forward definition - see mali_kbase.h */ -struct kbase_device; - -/** - * @brief Set up Kbase GPU properties. - * - * Set up Kbase GPU properties with information from the GPU registers - * - * @param kbdev The struct kbase_device structure for the device - */ -void kbase_gpuprops_set(struct kbase_device *kbdev); - -/** - * kbase_gpuprops_set_features - Set up Kbase GPU properties - * @kbdev: Device pointer - * - * This function sets up GPU properties that are dependent on the hardware - * features bitmask. This function must be preceeded by a call to - * kbase_hw_set_features_mask(). - */ -void kbase_gpuprops_set_features(struct kbase_device *kbdev); - -/** - * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer - * @kbdev: The kbase device - * - * Fills kbdev->gpu_props->prop_buffer with the GPU properties for user - * space to read. - */ -int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); - -/** - * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value - * @gpu_props: the &base_gpu_props structure - * - * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into - * separate fields (version_status, minor_revision, major_revision, product_id) - * stored in base_gpu_props::core_props. - */ -void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props); - - -#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops_types.h deleted file mode 100755 index d7877d1d4a57..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gpuprops_types.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_gpuprops_types.h - * Base kernel property query APIs - */ - -#ifndef _KBASE_GPUPROPS_TYPES_H_ -#define _KBASE_GPUPROPS_TYPES_H_ - -#include "mali_base_kernel.h" - -#define KBASE_GPU_SPEED_MHZ 123 -#define KBASE_GPU_PC_SIZE_LOG2 24U - -struct kbase_gpuprops_regdump { - u32 gpu_id; - u32 l2_features; - u32 core_features; - u32 tiler_features; - u32 mem_features; - u32 mmu_features; - u32 as_present; - u32 js_present; - u32 thread_max_threads; - u32 thread_max_workgroup_size; - u32 thread_max_barrier_size; - u32 thread_features; - u32 thread_tls_alloc; - u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; - u32 js_features[GPU_MAX_JOB_SLOTS]; - u32 shader_present_lo; - u32 shader_present_hi; - u32 tiler_present_lo; - u32 tiler_present_hi; - u32 l2_present_lo; - u32 l2_present_hi; - u32 stack_present_lo; - u32 stack_present_hi; - u32 coherency_features; -}; - -struct kbase_gpu_cache_props { - u8 associativity; - u8 external_bus_width; -}; - -struct kbase_gpu_mem_props { - u8 core_group; -}; - -struct kbase_gpu_mmu_props { - u8 va_bits; - u8 pa_bits; -}; - -struct kbase_gpu_props { - /* kernel-only properties */ - u8 num_cores; - u8 num_core_groups; - u8 num_address_spaces; - u8 num_job_slots; - - struct kbase_gpu_cache_props l2_props; - - struct kbase_gpu_mem_props mem; - struct kbase_gpu_mmu_props mmu; - - /* Properties shared with userspace */ - base_gpu_props props; - - u32 prop_buffer_size; - void *prop_buffer; -}; - -#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.c deleted file mode 100755 index 0481f80fec75..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_gwt.h" -#include - -static inline void kbase_gpu_gwt_setup_page_permission( - struct kbase_context *kctx, - unsigned long flag, - struct rb_node *node) -{ - struct rb_node *rbnode = node; - - while (rbnode) { - struct kbase_va_region *reg; - int err = 0; - - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if (reg->nr_pages && !(reg->flags & KBASE_REG_FREE) && - (reg->flags & KBASE_REG_GPU_WR)) { - err = kbase_mmu_update_pages(kctx, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - reg->gpu_alloc->nents, - reg->flags & flag); - if (err) - dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); - } - - rbnode = rb_next(rbnode); - } -} - -static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, - unsigned long flag) -{ - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_same))); - kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_custom))); -} - - -int kbase_gpu_gwt_start(struct kbase_context *kctx) -{ - kbase_gpu_vm_lock(kctx); - if (kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); - return -EBUSY; - } - - INIT_LIST_HEAD(&kctx->gwt_current_list); - INIT_LIST_HEAD(&kctx->gwt_snapshot_list); - - /* If GWT is enabled using new vector dumping format - * from user space, back up status of the job serialization flag and - * use full serialisation of jobs for dumping. - * Status will be restored on end of dumping in gwt_stop. - */ - kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; - kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | - KBASE_SERIALIZE_INTER_SLOT; - - /* Mark gwt enabled before making pages read only in case a - write page fault is triggered while we're still in this loop. - (kbase_gpu_vm_lock() doesn't prevent this!) - */ - kctx->gwt_enabled = true; - kctx->gwt_was_enabled = true; - - kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); - - kbase_gpu_vm_unlock(kctx); - return 0; -} - -int kbase_gpu_gwt_stop(struct kbase_context *kctx) -{ - struct kbasep_gwt_list_element *pos, *n; - - kbase_gpu_vm_lock(kctx); - if (!kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); - return -EINVAL; - } - - list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) { - list_del(&pos->link); - kfree(pos); - } - - list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) { - list_del(&pos->link); - kfree(pos); - } - - kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; - - kbase_gpu_gwt_setup_pages(kctx, ~0UL); - - kctx->gwt_enabled = false; - kbase_gpu_vm_unlock(kctx); - return 0; -} - - -static int list_cmp_function(void *priv, struct list_head *a, - struct list_head *b) -{ - struct kbasep_gwt_list_element *elementA = container_of(a, - struct kbasep_gwt_list_element, link); - struct kbasep_gwt_list_element *elementB = container_of(b, - struct kbasep_gwt_list_element, link); - - CSTD_UNUSED(priv); - - if (elementA->page_addr > elementB->page_addr) - return 1; - return -1; -} - -static void kbase_gpu_gwt_collate(struct kbase_context *kctx, - struct list_head *snapshot_list) -{ - struct kbasep_gwt_list_element *pos, *n; - struct kbasep_gwt_list_element *collated = NULL; - - /* Sort the list */ - list_sort(NULL, snapshot_list, list_cmp_function); - - /* Combine contiguous areas. */ - list_for_each_entry_safe(pos, n, snapshot_list, link) { - if (collated == NULL || collated->region != - pos->region || - (collated->page_addr + - (collated->num_pages * PAGE_SIZE)) != - pos->page_addr) { - /* This is the first time through, a new region or - * is not contiguous - start collating to this element - */ - collated = pos; - } else { - /* contiguous so merge */ - collated->num_pages += pos->num_pages; - /* remove element from list */ - list_del(&pos->link); - kfree(pos); - } - } -} - -int kbase_gpu_gwt_dump(struct kbase_context *kctx, - union kbase_ioctl_cinstr_gwt_dump *gwt_dump) -{ - const u32 ubuf_size = gwt_dump->in.len; - u32 ubuf_count = 0; - __user void *user_addr = (__user void *) - (uintptr_t)gwt_dump->in.addr_buffer; - __user void *user_sizes = (__user void *) - (uintptr_t)gwt_dump->in.size_buffer; - - kbase_gpu_vm_lock(kctx); - - if (!kctx->gwt_enabled) { - kbase_gpu_vm_unlock(kctx); - /* gwt_dump shouldn't be called when gwt is disabled */ - return -EPERM; - } - - if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer - || !gwt_dump->in.size_buffer) { - kbase_gpu_vm_unlock(kctx); - /* We don't have any valid user space buffer to copy the - * write modified addresses. - */ - return -EINVAL; - } - - if (list_empty(&kctx->gwt_snapshot_list) && - !list_empty(&kctx->gwt_current_list)) { - - list_replace_init(&kctx->gwt_current_list, - &kctx->gwt_snapshot_list); - - /* We have collected all write faults so far - * and they will be passed on to user space. - * Reset the page flags state to allow collection of - * further write faults. - */ - kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); - - /* Sort and combine consecutive pages in the dump list*/ - kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list); - } - - while ((!list_empty(&kctx->gwt_snapshot_list))) { - u64 addr_buffer[32]; - u64 num_page_buffer[32]; - u32 count = 0; - int err; - struct kbasep_gwt_list_element *dump_info, *n; - - list_for_each_entry_safe(dump_info, n, - &kctx->gwt_snapshot_list, link) { - addr_buffer[count] = dump_info->page_addr; - num_page_buffer[count] = dump_info->num_pages; - count++; - list_del(&dump_info->link); - kfree(dump_info); - if (ARRAY_SIZE(addr_buffer) == count || - ubuf_size == (ubuf_count + count)) - break; - } - - if (count) { - err = copy_to_user((user_addr + - (ubuf_count * sizeof(u64))), - (void *)addr_buffer, - count * sizeof(u64)); - if (err) { - dev_err(kctx->kbdev->dev, "Copy to user failure\n"); - kbase_gpu_vm_unlock(kctx); - return err; - } - err = copy_to_user((user_sizes + - (ubuf_count * sizeof(u64))), - (void *)num_page_buffer, - count * sizeof(u64)); - if (err) { - dev_err(kctx->kbdev->dev, "Copy to user failure\n"); - kbase_gpu_vm_unlock(kctx); - return err; - } - - ubuf_count += count; - } - - if (ubuf_count == ubuf_size) - break; - } - - if (!list_empty(&kctx->gwt_snapshot_list)) - gwt_dump->out.more_data_available = 1; - else - gwt_dump->out.more_data_available = 0; - - gwt_dump->out.no_of_addr_collected = ubuf_count; - kbase_gpu_vm_unlock(kctx); - return 0; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.h deleted file mode 100755 index 7e7746e64915..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_gwt.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#if !defined(_KBASE_GWT_H) -#define _KBASE_GWT_H - -#include -#include - -/** - * kbase_gpu_gwt_start - Start the GPU write tracking - * @kctx: Pointer to kernel context - * - * @return 0 on success, error on failure. - */ -int kbase_gpu_gwt_start(struct kbase_context *kctx); - -/** - * kbase_gpu_gwt_stop - Stop the GPU write tracking - * @kctx: Pointer to kernel context - * - * @return 0 on success, error on failure. - */ -int kbase_gpu_gwt_stop(struct kbase_context *kctx); - -/** - * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space. - * @kctx: Pointer to kernel context - * @gwt_dump: User space data to be passed. - * - * @return 0 on success, error on failure. - */ -int kbase_gpu_gwt_dump(struct kbase_context *kctx, - union kbase_ioctl_cinstr_gwt_dump *gwt_dump); - -#endif /* _KBASE_GWT_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.c deleted file mode 100755 index 450926c3d35f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.c +++ /dev/null @@ -1,564 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Run-time work-arounds helpers - */ - -#include -#include -#include -#include "mali_kbase.h" -#include "mali_kbase_hw.h" - -void kbase_hw_set_features_mask(struct kbase_device *kbdev) -{ - const enum base_hw_feature *features; - u32 gpu_id; - u32 product_id; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; - product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: - features = base_hw_features_tMIx; - break; - case GPU_ID2_PRODUCT_THEX: - features = base_hw_features_tHEx; - break; - case GPU_ID2_PRODUCT_TSIX: - features = base_hw_features_tSIx; - break; - case GPU_ID2_PRODUCT_TDVX: - features = base_hw_features_tDVx; - break; - case GPU_ID2_PRODUCT_TNOX: - features = base_hw_features_tNOx; - break; - case GPU_ID2_PRODUCT_TGOX: - features = base_hw_features_tGOx; - break; - case GPU_ID2_PRODUCT_TKAX: - features = base_hw_features_tKAx; - break; - case GPU_ID2_PRODUCT_TEGX: - features = base_hw_features_tEGx; - break; - case GPU_ID2_PRODUCT_TTRX: - features = base_hw_features_tTRx; - break; - case GPU_ID2_PRODUCT_TNAX: - features = base_hw_features_tNAx; - break; - case GPU_ID2_PRODUCT_TBEX: - features = base_hw_features_tBEx; - break; - case GPU_ID2_PRODUCT_TULX: - features = base_hw_features_tULx; - break; - case GPU_ID2_PRODUCT_TBOX: - features = base_hw_features_tBOx; - break; - case GPU_ID2_PRODUCT_TIDX: - features = base_hw_features_tIDx; - break; - case GPU_ID2_PRODUCT_TVAX: - features = base_hw_features_tVAx; - break; - default: - features = base_hw_features_generic; - break; - } - } else { - switch (product_id) { - case GPU_ID_PI_TFRX: - /* FALLTHROUGH */ - case GPU_ID_PI_T86X: - features = base_hw_features_tFxx; - break; - case GPU_ID_PI_T83X: - features = base_hw_features_t83x; - break; - case GPU_ID_PI_T82X: - features = base_hw_features_t82x; - break; - case GPU_ID_PI_T76X: - features = base_hw_features_t76x; - break; - case GPU_ID_PI_T72X: - features = base_hw_features_t72x; - break; - case GPU_ID_PI_T62X: - features = base_hw_features_t62x; - break; - case GPU_ID_PI_T60X: - features = base_hw_features_t60x; - break; - default: - features = base_hw_features_generic; - break; - } - } - - for (; *features != BASE_HW_FEATURE_END; features++) - set_bit(*features, &kbdev->hw_features_mask[0]); -} - -/** - * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID - * @kbdev: Device pointer - * - * Return: pointer to an array of hardware issues, terminated by - * BASE_HW_ISSUE_END. - * - * This function can only be used on new-format GPU IDs, i.e. those for which - * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev. - * - * In debugging versions of the driver, unknown versions of a known GPU will - * be treated as the most recent known version not later than the actual - * version. In such circumstances, the GPU ID in @kbdev will also be replaced - * with the most recent known version. - * - * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() - * before calling this function. - */ -static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( - struct kbase_device *kbdev) -{ - const enum base_hw_issue *issues = NULL; - - struct base_hw_product { - u32 product_model; - struct { - u32 version; - const enum base_hw_issue *issues; - } map[7]; - }; - - static const struct base_hw_product base_hw_products[] = { - {GPU_ID2_PRODUCT_TMIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 1), - base_hw_issues_tMIx_r0p0_05dev0}, - {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, - {U32_MAX /* sentinel value */, NULL} } }, - - {GPU_ID2_PRODUCT_THEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, - {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TSIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDVX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TGOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TKAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TEGX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tEGx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TTRX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TULX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TIDX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tIDx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TVAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, - {U32_MAX, NULL} } }, - }; - - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; - const struct base_hw_product *product = NULL; - size_t p; - - /* Stop when we reach the end of the products array. */ - for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { - if (product_model == base_hw_products[p].product_model) { - product = &base_hw_products[p]; - break; - } - } - - if (product != NULL) { - /* Found a matching product. */ - const u32 version = gpu_id & GPU_ID2_VERSION; - u32 fallback_version = 0; - const enum base_hw_issue *fallback_issues = NULL; - size_t v; - - /* Stop when we reach the end of the map. */ - for (v = 0; product->map[v].version != U32_MAX; ++v) { - - if (version == product->map[v].version) { - /* Exact match so stop. */ - issues = product->map[v].issues; - break; - } - - /* Check whether this is a candidate for most recent - known version not later than the actual - version. */ - if ((version > product->map[v].version) && - (product->map[v].version >= fallback_version)) { -#if MALI_CUSTOMER_RELEASE - /* Match on version's major and minor fields */ - if (((version ^ product->map[v].version) >> - GPU_ID2_VERSION_MINOR_SHIFT) == 0) -#endif - { - fallback_version = product->map[v].version; - fallback_issues = product->map[v].issues; - } - } - } - - if ((issues == NULL) && (fallback_issues != NULL)) { - /* Fall back to the issue set of the most recent known - version not later than the actual version. */ - issues = fallback_issues; - -#if MALI_CUSTOMER_RELEASE - dev_warn(kbdev->dev, - "GPU hardware issue table may need updating:\n" -#else - dev_info(kbdev->dev, -#endif - "r%dp%d status %d is unknown; treating as r%dp%d status %d", - (gpu_id & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT, - (fallback_version & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (fallback_version & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); - - gpu_id &= ~GPU_ID2_VERSION; - gpu_id |= fallback_version; - kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; - - kbase_gpuprops_update_core_props_gpu_id( - &kbdev->gpu_props.props); - } - } - return issues; -} - -int kbase_hw_set_issues_mask(struct kbase_device *kbdev) -{ - const enum base_hw_issue *issues; - u32 gpu_id; - u32 product_id; - u32 impl_tech; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; - product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; - impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; - - if (impl_tech != IMPLEMENTATION_MODEL) { - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - issues = kbase_hw_get_issues_for_new_id(kbdev); - if (issues == NULL) { - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } - -#if !MALI_CUSTOMER_RELEASE - /* The GPU ID might have been replaced with the last - known version of the same GPU. */ - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; -#endif - - } else { - switch (gpu_id) { - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): - issues = base_hw_issues_t60x_r0p0_15dev0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): - issues = base_hw_issues_t60x_r0p0_eac; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): - issues = base_hw_issues_t60x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): - issues = base_hw_issues_t62x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): - issues = base_hw_issues_t62x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): - issues = base_hw_issues_t62x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): - issues = base_hw_issues_t76x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): - issues = base_hw_issues_t76x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): - issues = base_hw_issues_t76x_r0p1_50rel0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): - issues = base_hw_issues_t76x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): - issues = base_hw_issues_t76x_r0p3; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): - issues = base_hw_issues_t76x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): - issues = base_hw_issues_t72x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): - issues = base_hw_issues_t72x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): - issues = base_hw_issues_t72x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): - issues = base_hw_issues_tFRx_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): - issues = base_hw_issues_tFRx_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): - issues = base_hw_issues_tFRx_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): - issues = base_hw_issues_tFRx_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): - issues = base_hw_issues_t86x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): - issues = base_hw_issues_t86x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): - issues = base_hw_issues_t86x_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): - issues = base_hw_issues_t83x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): - issues = base_hw_issues_t83x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): - issues = base_hw_issues_t82x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): - issues = base_hw_issues_t82x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): - issues = base_hw_issues_t82x_r1p0; - break; - default: - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } - } - } else { - /* Software model */ - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: - issues = base_hw_issues_model_tMIx; - break; - case GPU_ID2_PRODUCT_THEX: - issues = base_hw_issues_model_tHEx; - break; - case GPU_ID2_PRODUCT_TSIX: - issues = base_hw_issues_model_tSIx; - break; - case GPU_ID2_PRODUCT_TDVX: - issues = base_hw_issues_model_tDVx; - break; - case GPU_ID2_PRODUCT_TNOX: - issues = base_hw_issues_model_tNOx; - break; - case GPU_ID2_PRODUCT_TGOX: - issues = base_hw_issues_model_tGOx; - break; - case GPU_ID2_PRODUCT_TKAX: - issues = base_hw_issues_model_tKAx; - break; - case GPU_ID2_PRODUCT_TEGX: - issues = base_hw_issues_model_tEGx; - break; - case GPU_ID2_PRODUCT_TTRX: - issues = base_hw_issues_model_tTRx; - break; - case GPU_ID2_PRODUCT_TNAX: - issues = base_hw_issues_model_tNAx; - break; - case GPU_ID2_PRODUCT_TBEX: - issues = base_hw_issues_model_tBEx; - break; - case GPU_ID2_PRODUCT_TULX: - issues = base_hw_issues_model_tULx; - break; - case GPU_ID2_PRODUCT_TBOX: - issues = base_hw_issues_model_tBOx; - break; - case GPU_ID2_PRODUCT_TIDX: - issues = base_hw_issues_model_tIDx; - break; - case GPU_ID2_PRODUCT_TVAX: - issues = base_hw_issues_model_tVAx; - break; - default: - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } - } else { - switch (product_id) { - case GPU_ID_PI_T60X: - issues = base_hw_issues_model_t60x; - break; - case GPU_ID_PI_T62X: - issues = base_hw_issues_model_t62x; - break; - case GPU_ID_PI_T72X: - issues = base_hw_issues_model_t72x; - break; - case GPU_ID_PI_T76X: - issues = base_hw_issues_model_t76x; - break; - case GPU_ID_PI_TFRX: - issues = base_hw_issues_model_tFRx; - break; - case GPU_ID_PI_T86X: - issues = base_hw_issues_model_t86x; - break; - case GPU_ID_PI_T83X: - issues = base_hw_issues_model_t83x; - break; - case GPU_ID_PI_T82X: - issues = base_hw_issues_model_t82x; - break; - default: - dev_err(kbdev->dev, "Unknown GPU ID %x", - gpu_id); - return -EINVAL; - } - } - } - - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - dev_info(kbdev->dev, - "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", - (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> - GPU_ID2_PRODUCT_MAJOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_MAJOR) >> - GPU_ID2_ARCH_MAJOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_MINOR) >> - GPU_ID2_ARCH_MINOR_SHIFT, - (gpu_id & GPU_ID2_ARCH_REV) >> - GPU_ID2_ARCH_REV_SHIFT, - (gpu_id & GPU_ID2_VERSION_MAJOR) >> - GPU_ID2_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_MINOR) >> - GPU_ID2_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID2_VERSION_STATUS) >> - GPU_ID2_VERSION_STATUS_SHIFT); - } else { - dev_info(kbdev->dev, - "GPU identified as 0x%04x r%dp%d status %d", - (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> - GPU_ID_VERSION_PRODUCT_ID_SHIFT, - (gpu_id & GPU_ID_VERSION_MAJOR) >> - GPU_ID_VERSION_MAJOR_SHIFT, - (gpu_id & GPU_ID_VERSION_MINOR) >> - GPU_ID_VERSION_MINOR_SHIFT, - (gpu_id & GPU_ID_VERSION_STATUS) >> - GPU_ID_VERSION_STATUS_SHIFT); - } - - for (; *issues != BASE_HW_ISSUE_END; issues++) - set_bit(*issues, &kbdev->hw_issues_mask[0]); - - return 0; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.h deleted file mode 100755 index f386b1624317..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hw.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file - * Run-time work-arounds helpers - */ - -#ifndef _KBASE_HW_H_ -#define _KBASE_HW_H_ - -#include "mali_kbase_defs.h" - -/** - * @brief Tell whether a work-around should be enabled - */ -#define kbase_hw_has_issue(kbdev, issue)\ - test_bit(issue, &(kbdev)->hw_issues_mask[0]) - -/** - * @brief Tell whether a feature is supported - */ -#define kbase_hw_has_feature(kbdev, feature)\ - test_bit(feature, &(kbdev)->hw_features_mask[0]) - -/** - * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID - * @kbdev: Device pointer - * - * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. - * - * The GPU ID is read from the @kbdev. - * - * In debugging versions of the driver, unknown versions of a known GPU with a - * new-format ID will be treated as the most recent known version not later - * than the actual version. In such circumstances, the GPU ID in @kbdev will - * also be replaced with the most recent known version. - * - * Note: The GPU configuration must have been read by - * kbase_gpuprops_get_props() before calling this function. - */ -int kbase_hw_set_issues_mask(struct kbase_device *kbdev); - -/** - * @brief Set the features mask depending on the GPU ID - */ -void kbase_hw_set_features_mask(struct kbase_device *kbdev); - -#endif /* _KBASE_HW_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_backend.h deleted file mode 100755 index dde4965c426a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_backend.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * HW access backend common APIs - */ - -#ifndef _KBASE_HWACCESS_BACKEND_H_ -#define _KBASE_HWACCESS_BACKEND_H_ - -/** - * kbase_backend_early_init - Perform any backend-specific initialization. - * @kbdev: Device pointer - * - * Return: 0 on success, or an error code on failure. - */ -int kbase_backend_early_init(struct kbase_device *kbdev); - -/** - * kbase_backend_late_init - Perform any backend-specific initialization. - * @kbdev: Device pointer - * - * Return: 0 on success, or an error code on failure. - */ -int kbase_backend_late_init(struct kbase_device *kbdev); - -/** - * kbase_backend_early_term - Perform any backend-specific termination. - * @kbdev: Device pointer - */ -void kbase_backend_early_term(struct kbase_device *kbdev); - -/** - * kbase_backend_late_term - Perform any backend-specific termination. - * @kbdev: Device pointer - */ -void kbase_backend_late_term(struct kbase_device *kbdev); - -#endif /* _KBASE_HWACCESS_BACKEND_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_defs.h deleted file mode 100755 index 124a2d9cf0c3..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_defs.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/** - * @file mali_kbase_hwaccess_gpu_defs.h - * HW access common definitions - */ - -#ifndef _KBASE_HWACCESS_DEFS_H_ -#define _KBASE_HWACCESS_DEFS_H_ - -#include - -/** - * struct kbase_hwaccess_data - object encapsulating the GPU backend specific - * data for the HW access layer. - * hwaccess_lock (a spinlock) must be held when - * accessing this structure. - * @active_kctx: pointer to active kbase context which last submitted an - * atom to GPU and while the context is active it can - * submit new atoms to GPU from the irq context also, without - * going through the bottom half of job completion path. - * @backend: GPU backend specific data for HW access layer - */ -struct kbase_hwaccess_data { - struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; - - struct kbase_backend_data backend; -}; - -#endif /* _KBASE_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_gpuprops.h deleted file mode 100755 index 63844d97ce02..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_gpuprops.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/** - * Base kernel property query backend APIs - */ - -#ifndef _KBASE_HWACCESS_GPUPROPS_H_ -#define _KBASE_HWACCESS_GPUPROPS_H_ - -/** - * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from - * GPU - * @kbdev: Device pointer - * @regdump: Pointer to struct kbase_gpuprops_regdump structure - * - * The caller should ensure that GPU remains powered-on during this function. - */ -void kbase_backend_gpuprops_get(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump); - -/** - * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU - * @kbdev: Device pointer - * @regdump: Pointer to struct kbase_gpuprops_regdump structure - * - * This function reads GPU properties that are dependent on the hardware - * features bitmask. It will power-on the GPU if required. - */ -void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, - struct kbase_gpuprops_regdump *regdump); - - -#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_instr.h deleted file mode 100755 index d5b90994790b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_instr.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * HW Access instrumentation common APIs - */ - -#ifndef _KBASE_HWACCESS_INSTR_H_ -#define _KBASE_HWACCESS_INSTR_H_ - -#include - -/** - * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. - * @dump_buffer: GPU address to write counters to. - * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. - * @jm_bm: counters selection bitmask (JM). - * @shader_bm: counters selection bitmask (Shader). - * @tiler_bm: counters selection bitmask (Tiler). - * @mmu_l2_bm: counters selection bitmask (MMU_L2). - * @use_secondary: use secondary performance counters set for applicable - * counter blocks. - */ -struct kbase_instr_hwcnt_enable { - u64 dump_buffer; - u64 dump_buffer_bytes; - u32 jm_bm; - u32 shader_bm; - u32 tiler_bm; - u32 mmu_l2_bm; - bool use_secondary; -}; - -/** - * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection - * @kbdev: Kbase device - * @kctx: Kbase context - * @enable: HW counter setup parameters - * - * Context: might sleep, waiting for reset to complete - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_instr_hwcnt_enable *enable); - -/** - * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection - * @kctx: Kbase context - * - * Context: might sleep, waiting for an ongoing dump to complete - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); - -/** - * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU - * @kctx: Kbase context - * - * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, - * of call kbase_instr_hwcnt_wait_for_dump(). - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); - -/** - * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has - * completed. - * @kctx: Kbase context - * - * Context: will sleep, waiting for dump to complete - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); - -/** - * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has - * completed - * @kctx: Kbase context - * @success: Set to true if successful - * - * Context: does not sleep. - * - * Return: true if the dump is complete - */ -bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, - bool * const success); - -/** - * kbase_instr_hwcnt_clear() - Clear HW counters - * @kctx: Kbase context - * - * Context: might sleep, waiting for reset to complete - * - * Return: 0 on success - */ -int kbase_instr_hwcnt_clear(struct kbase_context *kctx); - -/** - * kbase_instr_backend_init() - Initialise the instrumentation backend - * @kbdev: Kbase device - * - * This function should be called during driver initialization. - * - * Return: 0 on success - */ -int kbase_instr_backend_init(struct kbase_device *kbdev); - -/** - * kbase_instr_backend_init() - Terminate the instrumentation backend - * @kbdev: Kbase device - * - * This function should be called during driver termination. - */ -void kbase_instr_backend_term(struct kbase_device *kbdev); - -#endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_jm.h deleted file mode 100755 index e2798eb09aea..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_jm.h +++ /dev/null @@ -1,377 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * HW access job manager common APIs - */ - -#ifndef _KBASE_HWACCESS_JM_H_ -#define _KBASE_HWACCESS_JM_H_ - -/** - * kbase_backend_run_atom() - Run an atom on the GPU - * @kbdev: Device pointer - * @atom: Atom to run - * - * Caller must hold the HW access lock - */ -void kbase_backend_run_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_backend_slot_update - Update state based on slot ringbuffers - * - * @kbdev: Device pointer - * - * Inspect the jobs in the slot ringbuffers and update state. - * - * This will cause jobs to be submitted to hardware if they are unblocked - */ -void kbase_backend_slot_update(struct kbase_device *kbdev); - -/** - * kbase_backend_find_and_release_free_address_space() - Release a free AS - * @kbdev: Device pointer - * @kctx: Context pointer - * - * This function can evict an idle context from the runpool, freeing up the - * address space it was using. - * - * The address space is marked as in use. The caller must either assign a - * context using kbase_gpu_use_ctx(), or release it using - * kbase_ctx_sched_release() - * - * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none - * available - */ -int kbase_backend_find_and_release_free_address_space( - struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the - * provided address space. - * @kbdev: Device pointer - * @kctx: Context pointer. May be NULL - * @as_nr: Free address space to use - * - * kbase_gpu_next_job() will pull atoms from the active context. - * - * Return: true if successful, false if ASID not assigned. - */ -bool kbase_backend_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int as_nr); - -/** - * kbase_backend_use_ctx_sched() - Activate a context. - * @kbdev: Device pointer - * @kctx: Context pointer - * @js: Job slot to activate context on - * - * kbase_gpu_next_job() will pull atoms from the active context. - * - * The context must already be scheduled and assigned to an address space. If - * the context is not scheduled, then kbase_gpu_use_ctx() should be used - * instead. - * - * Caller must hold hwaccess_lock - * - * Return: true if context is now active, false otherwise (ie if context does - * not have an address space assigned) - */ -bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, - struct kbase_context *kctx, int js); - -/** - * kbase_backend_release_ctx_irq - Release a context from the GPU. This will - * de-assign the assigned address space. - * @kbdev: Device pointer - * @kctx: Context pointer - * - * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock - */ -void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, - struct kbase_context *kctx); - -/** - * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will - * de-assign the assigned address space. - * @kbdev: Device pointer - * @kctx: Context pointer - * - * Caller must hold kbase_device->mmu_hw_mutex - * - * This function must perform any operations that could not be performed in IRQ - * context by kbase_backend_release_ctx_irq(). - */ -void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, - struct kbase_context *kctx); - -/** - * kbase_backend_cache_clean - Perform a cache clean if the given atom requires - * one - * @kbdev: Device pointer - * @katom: Pointer to the failed atom - * - * On some GPUs, the GPU cache must be cleaned following a failed atom. This - * function performs a clean if it is required by @katom. - */ -void kbase_backend_cache_clean(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - - -/** - * kbase_backend_complete_wq() - Perform backend-specific actions required on - * completing an atom. - * @kbdev: Device pointer - * @katom: Pointer to the atom to complete - * - * This function should only be called from kbase_jd_done_worker() or - * js_return_worker(). - * - * Return: true if atom has completed, false if atom should be re-submitted - */ -void kbase_backend_complete_wq(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_backend_complete_wq_post_sched - Perform backend-specific actions - * required on completing an atom, after - * any scheduling has taken place. - * @kbdev: Device pointer - * @core_req: Core requirements of atom - * - * This function should only be called from kbase_jd_done_worker() or - * js_return_worker(). - */ -void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, - base_jd_core_req core_req); - -/** - * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU - * and remove any others from the ringbuffers. - * @kbdev: Device pointer - * @end_timestamp: Timestamp of reset - */ -void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); - -/** - * kbase_backend_inspect_tail - Return the atom currently at the tail of slot - * @js - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return : Atom currently at the head of slot @js, or NULL - */ -struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, - int js); - -/** - * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a - * slot. - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return : Number of atoms currently on slot - */ -int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); - -/** - * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot - * that are currently on the GPU. - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return : Number of atoms currently on slot @js that are currently on the GPU. - */ -int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); - -/** - * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs - * has changed. - * @kbdev: Device pointer - * - * Perform any required backend-specific actions (eg starting/stopping - * scheduling timers). - */ -void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); - -/** - * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. - * @kbdev: Device pointer - * - * Perform any required backend-specific actions (eg updating timeouts of - * currently running atoms). - */ -void kbase_backend_timeouts_changed(struct kbase_device *kbdev); - -/** - * kbase_backend_slot_free() - Return the number of jobs that can be currently - * submitted to slot @js. - * @kbdev: Device pointer - * @js: Job slot to inspect - * - * Return : Number of jobs that can be submitted. - */ -int kbase_backend_slot_free(struct kbase_device *kbdev, int js); - -/** - * kbase_job_check_enter_disjoint - potentially leave disjoint state - * @kbdev: kbase device - * @target_katom: atom which is finishing - * - * Work out whether to leave disjoint state when finishing an atom that was - * originated by kbase_job_check_enter_disjoint(). - */ -void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, - struct kbase_jd_atom *target_katom); - -/** - * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently - * running from a context - * @kctx: Context pointer - * - * This is used in response to a page fault to remove all jobs from the faulting - * context from the hardware. - */ -void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx); - -/** - * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and - * to be descheduled. - * @kctx: Context pointer - * - * This should be called following kbase_js_zap_context(), to ensure the context - * can be safely destroyed. - */ -void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); - -/** - * kbase_backend_get_current_flush_id - Return the current flush ID - * - * @kbdev: Device pointer - * - * Return: the current flush ID to be recorded for each job chain - */ -u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); - -/** - * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. - * @kbdev: Device pointer - * - * This function just soft-stops all the slots to ensure that as many jobs as - * possible are saved. - * - * Return: a boolean which should be interpreted as follows: - * - true - Prepared for reset, kbase_reset_gpu should be called. - * - false - Another thread is performing a reset, kbase_reset_gpu should - * not be called. - */ -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu - Reset the GPU - * @kbdev: Device pointer - * - * This function should be called after kbase_prepare_to_reset_gpu if it returns - * true. It should never be called without a corresponding call to - * kbase_prepare_to_reset_gpu. - * - * After this function is called (or not called if kbase_prepare_to_reset_gpu - * returned false), the caller should wait for kbdev->reset_waitq to be - * signalled to know when the reset has completed. - */ -void kbase_reset_gpu(struct kbase_device *kbdev); - -/** - * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. - * @kbdev: Device pointer - * - * This function just soft-stops all the slots to ensure that as many jobs as - * possible are saved. - * - * Return: a boolean which should be interpreted as follows: - * - true - Prepared for reset, kbase_reset_gpu should be called. - * - false - Another thread is performing a reset, kbase_reset_gpu should - * not be called. - */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_locked - Reset the GPU - * @kbdev: Device pointer - * - * This function should be called after kbase_prepare_to_reset_gpu if it - * returns true. It should never be called without a corresponding call to - * kbase_prepare_to_reset_gpu. - * - * After this function is called (or not called if kbase_prepare_to_reset_gpu - * returned false), the caller should wait for kbdev->reset_waitq to be - * signalled to know when the reset has completed. - */ -void kbase_reset_gpu_locked(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_silent - Reset the GPU silently - * @kbdev: Device pointer - * - * Reset the GPU without trying to cancel jobs and don't emit messages into - * the kernel log while doing the reset. - * - * This function should be used in cases where we are doing a controlled reset - * of the GPU as part of normal processing (e.g. exiting protected mode) where - * the driver will have ensured the scheduler has been idled and all other - * users of the GPU (e.g. instrumentation) have been suspended. - * - * Return: 0 if the reset was started successfully - * -EAGAIN if another reset is currently in progress - */ -int kbase_reset_gpu_silent(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_active - Reports if the GPU is being reset - * @kbdev: Device pointer - * - * Return: True if the GPU is in the process of being reset. - */ -bool kbase_reset_gpu_active(struct kbase_device *kbdev); - -/** - * kbase_job_slot_hardstop - Hard-stop the specified job slot - * @kctx: The kbase context that contains the job(s) that should - * be hard-stopped - * @js: The job slot to hard-stop - * @target_katom: The job that should be hard-stopped (or NULL for all - * jobs from the context) - * Context: - * The job slot lock must be held when calling this function. - */ -void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, - struct kbase_jd_atom *target_katom); - -/* Object containing callbacks for enabling/disabling protected mode, used - * on GPU which supports protected mode switching natively. - */ -extern struct protected_mode_ops kbase_native_protected_ops; - -#endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_pm.h deleted file mode 100755 index 5bb38872f4e7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_pm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/** - * @file mali_kbase_hwaccess_pm.h - * HW access power manager common APIs - */ - -#ifndef _KBASE_HWACCESS_PM_H_ -#define _KBASE_HWACCESS_PM_H_ - -#include -#include - -#include - -/* Forward definition - see mali_kbase.h */ -struct kbase_device; - -/* Functions common to all HW access backends */ - -/** - * Initialize the power management framework. - * - * Must be called before any other power management function - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: 0 if the power management framework was successfully initialized. - */ -int kbase_hwaccess_pm_early_init(struct kbase_device *kbdev); - -/** - * Initialize the power management framework. - * - * Must be called before any other power management function (except - * @ref kbase_hwaccess_pm_early_init) - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * - * Return: 0 if the power management framework was successfully initialized. - */ -int kbase_hwaccess_pm_late_init(struct kbase_device *kbdev); - -/** - * Terminate the power management framework. - * - * No power management functions may be called after this (except - * @ref kbase_pm_init) - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_hwaccess_pm_early_term(struct kbase_device *kbdev); - -/** - * Terminate the power management framework. - * - * No power management functions may be called after this (except - * @ref kbase_hwaccess_pm_early_term or @ref kbase_hwaccess_pm_late_init) - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - */ -void kbase_hwaccess_pm_late_term(struct kbase_device *kbdev); - -/** - * kbase_hwaccess_pm_powerup - Power up the GPU. - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @flags: Flags to pass on to kbase_pm_init_hw - * - * Power up GPU after all modules have been initialized and interrupt handlers - * installed. - * - * Return: 0 if powerup was successful. - */ -int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, - unsigned int flags); - -/** - * Halt the power management framework. - * - * Should ensure that no new interrupts are generated, but allow any currently - * running interrupt handlers to complete successfully. The GPU is forced off by - * the time this function returns, regardless of whether or not the active power - * policy asks for the GPU to be powered off. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); - -/** - * Perform any backend-specific actions to suspend the GPU - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); - -/** - * Perform any backend-specific actions to resume the GPU from a suspend - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); - -/** - * Perform any required actions for activating the GPU. Called when the first - * context goes active. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); - -/** - * Perform any required actions for idling the GPU. Called when the last - * context goes idle. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - */ -void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); - - -/** - * Set the debug core mask. - * - * This determines which cores the power manager is allowed to use. - * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) - * @param new_core_mask_js0 The core mask to use for job slot 0 - * @param new_core_mask_js0 The core mask to use for job slot 1 - * @param new_core_mask_js0 The core mask to use for job slot 2 - */ -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask_js0, u64 new_core_mask_js1, - u64 new_core_mask_js2); - - -/** - * Get the current policy. - * - * Returns the policy that is currently active. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * - * @return The current policy - */ -const struct kbase_pm_ca_policy -*kbase_pm_ca_get_policy(struct kbase_device *kbdev); - -/** - * Change the policy to the one specified. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param policy The policy to change to (valid pointer returned from - * @ref kbase_pm_ca_list_policies) - */ -void kbase_pm_ca_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_ca_policy *policy); - -/** - * Retrieve a static list of the available policies. - * - * @param[out] policies An array pointer to take the list of policies. This may - * be NULL. The contents of this array must not be - * modified. - * - * @return The number of policies - */ -int -kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); - - -/** - * Get the current policy. - * - * Returns the policy that is currently active. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * - * @return The current policy - */ -const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); - -/** - * Change the policy to the one specified. - * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param policy The policy to change to (valid pointer returned from - * @ref kbase_pm_list_policies) - */ -void kbase_pm_set_policy(struct kbase_device *kbdev, - const struct kbase_pm_policy *policy); - -/** - * Retrieve a static list of the available policies. - * - * @param[out] policies An array pointer to take the list of policies. This may - * be NULL. The contents of this array must not be - * modified. - * - * @return The number of policies - */ -int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies); - -#endif /* _KBASE_HWACCESS_PM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_time.h deleted file mode 100755 index f7539f5b46c6..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwaccess_time.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/** - * - */ - -#ifndef _KBASE_BACKEND_TIME_H_ -#define _KBASE_BACKEND_TIME_H_ - -/** - * kbase_backend_get_gpu_time() - Get current GPU time - * @kbdev: Device pointer - * @cycle_counter: Pointer to u64 to store cycle counter in - * @system_time: Pointer to u64 to store system time in - * @ts: Pointer to struct timespec to store current monotonic - * time in - */ -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec *ts); - -/** - * kbase_wait_write_flush() - Wait for GPU write flush - * @kbdev: Kbase device - * - * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush - * its write buffer. - * - * If GPU resets occur then the counters are reset to zero, the delay may not be - * as expected. - * - * This function is only in use for BASE_HW_ISSUE_6367 - */ -#ifdef CONFIG_MALI_NO_MALI -static inline void kbase_wait_write_flush(struct kbase_device *kbdev) -{ -} -#else -void kbase_wait_write_flush(struct kbase_device *kbdev); -#endif - -#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt.c deleted file mode 100644 index efbac6fc8cd6..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt.c +++ /dev/null @@ -1,796 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Implementation of hardware counter context and accumulator APIs. - */ - -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_accumulator.h" -#include "mali_kbase_hwcnt_backend.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_malisw.h" -#include "mali_kbase_debug.h" -#include "mali_kbase_linux.h" - -#include -#include -#include - -/** - * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. - * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. - * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. - * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are - * any enabled counters. - */ -enum kbase_hwcnt_accum_state { - ACCUM_STATE_ERROR, - ACCUM_STATE_DISABLED, - ACCUM_STATE_ENABLED -}; - -/** - * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. - * @backend: Pointer to created counter backend. - * @state: The current state of the accumulator. - * - State transition from disabled->enabled or - * disabled->error requires state_lock. - * - State transition from enabled->disabled or - * enabled->error requires both accum_lock and - * state_lock. - * - Error state persists until next disable. - * @enable_map: The current set of enabled counters. - * - Must only be modified while holding both - * accum_lock and state_lock. - * - Can be read while holding either lock. - * - Must stay in sync with enable_map_any_enabled. - * @enable_map_any_enabled: True if any counters in the map are enabled, else - * false. If true, and state is ACCUM_STATE_ENABLED, - * then the counter backend will be enabled. - * - Must only be modified while holding both - * accum_lock and state_lock. - * - Can be read while holding either lock. - * - Must stay in sync with enable_map. - * @scratch_map: Scratch enable map, used as temporary enable map - * storage during dumps. - * - Must only be read or modified while holding - * accum_lock. - * @accum_buf: Accumulation buffer, where dumps will be accumulated - * into on transition to a disable state. - * - Must only be read or modified while holding - * accum_lock. - * @accumulated: True if the accumulation buffer has been accumulated - * into and not subsequently read from yet, else false. - * - Must only be read or modified while holding - * accum_lock. - * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent - * dump that was requested by the user. - * - Must only be read or modified while holding - * accum_lock. - */ -struct kbase_hwcnt_accumulator { - struct kbase_hwcnt_backend *backend; - enum kbase_hwcnt_accum_state state; - struct kbase_hwcnt_enable_map enable_map; - bool enable_map_any_enabled; - struct kbase_hwcnt_enable_map scratch_map; - struct kbase_hwcnt_dump_buffer accum_buf; - bool accumulated; - u64 ts_last_dump_ns; -}; - -/** - * struct kbase_hwcnt_context - Hardware counter context structure. - * @iface: Pointer to hardware counter backend interface. - * @state_lock: Spinlock protecting state. - * @disable_count: Disable count of the context. Initialised to 1. - * Decremented when the accumulator is acquired, and incremented - * on release. Incremented on calls to - * kbase_hwcnt_context_disable[_atomic], and decremented on - * calls to kbase_hwcnt_context_enable. - * - Must only be read or modified while holding state_lock. - * @accum_lock: Mutex protecting accumulator. - * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or - * termination. Set to true before accumulator initialisation, - * and false after accumulator termination. - * - Must only be modified while holding both accum_lock and - * state_lock. - * - Can be read while holding either lock. - * @accum: Hardware counter accumulator structure. - */ -struct kbase_hwcnt_context { - const struct kbase_hwcnt_backend_interface *iface; - spinlock_t state_lock; - size_t disable_count; - struct mutex accum_lock; - bool accum_inited; - struct kbase_hwcnt_accumulator accum; -}; - -int kbase_hwcnt_context_init( - const struct kbase_hwcnt_backend_interface *iface, - struct kbase_hwcnt_context **out_hctx) -{ - struct kbase_hwcnt_context *hctx = NULL; - - if (!iface || !out_hctx) - return -EINVAL; - - hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); - if (!hctx) - return -ENOMEM; - - hctx->iface = iface; - spin_lock_init(&hctx->state_lock); - hctx->disable_count = 1; - mutex_init(&hctx->accum_lock); - hctx->accum_inited = false; - - *out_hctx = hctx; - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); - -void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) -{ - if (!hctx) - return; - - /* Make sure we didn't leak the accumulator */ - WARN_ON(hctx->accum_inited); - kfree(hctx); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); - -/** - * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. - * @hctx: Non-NULL pointer to hardware counter context. - */ -static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) -{ - WARN_ON(!hctx); - WARN_ON(!hctx->accum_inited); - - kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); - kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); - kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); - hctx->iface->term(hctx->accum.backend); - memset(&hctx->accum, 0, sizeof(hctx->accum)); -} - -/** - * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. - * @hctx: Non-NULL pointer to hardware counter context. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) -{ - int errcode; - - WARN_ON(!hctx); - WARN_ON(!hctx->accum_inited); - - errcode = hctx->iface->init( - hctx->iface->info, &hctx->accum.backend); - if (errcode) - goto error; - - hctx->accum.state = ACCUM_STATE_ERROR; - - errcode = kbase_hwcnt_enable_map_alloc( - hctx->iface->metadata, &hctx->accum.enable_map); - if (errcode) - goto error; - - hctx->accum.enable_map_any_enabled = false; - - errcode = kbase_hwcnt_dump_buffer_alloc( - hctx->iface->metadata, &hctx->accum.accum_buf); - if (errcode) - goto error; - - errcode = kbase_hwcnt_enable_map_alloc( - hctx->iface->metadata, &hctx->accum.scratch_map); - if (errcode) - goto error; - - hctx->accum.accumulated = false; - - hctx->accum.ts_last_dump_ns = - hctx->iface->timestamp_ns(hctx->accum.backend); - - return 0; - -error: - kbasep_hwcnt_accumulator_term(hctx); - return errcode; -} - -/** - * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the - * disabled state, from the enabled or - * error states. - * @hctx: Non-NULL pointer to hardware counter context. - * @accumulate: True if we should accumulate before disabling, else false. - */ -static void kbasep_hwcnt_accumulator_disable( - struct kbase_hwcnt_context *hctx, bool accumulate) -{ - int errcode = 0; - bool backend_enabled = false; - struct kbase_hwcnt_accumulator *accum; - unsigned long flags; - - WARN_ON(!hctx); - lockdep_assert_held(&hctx->accum_lock); - WARN_ON(!hctx->accum_inited); - - accum = &hctx->accum; - - spin_lock_irqsave(&hctx->state_lock, flags); - - WARN_ON(hctx->disable_count != 0); - WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); - - if ((hctx->accum.state == ACCUM_STATE_ENABLED) && - (accum->enable_map_any_enabled)) - backend_enabled = true; - - if (!backend_enabled) - hctx->accum.state = ACCUM_STATE_DISABLED; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - - /* Early out if the backend is not already enabled */ - if (!backend_enabled) - return; - - if (!accumulate) - goto disable; - - /* Try and accumulate before disabling */ - errcode = hctx->iface->dump_request(accum->backend); - if (errcode) - goto disable; - - errcode = hctx->iface->dump_wait(accum->backend); - if (errcode) - goto disable; - - errcode = hctx->iface->dump_get(accum->backend, - &accum->accum_buf, &accum->enable_map, accum->accumulated); - if (errcode) - goto disable; - - accum->accumulated = true; - -disable: - hctx->iface->dump_disable(accum->backend); - - /* Regardless of any errors during the accumulate, put the accumulator - * in the disabled state. - */ - spin_lock_irqsave(&hctx->state_lock, flags); - - hctx->accum.state = ACCUM_STATE_DISABLED; - - spin_unlock_irqrestore(&hctx->state_lock, flags); -} - -/** - * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the - * enabled state, from the disabled state. - * @hctx: Non-NULL pointer to hardware counter context. - */ -static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) -{ - int errcode = 0; - struct kbase_hwcnt_accumulator *accum; - - WARN_ON(!hctx); - lockdep_assert_held(&hctx->state_lock); - WARN_ON(!hctx->accum_inited); - WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); - - accum = &hctx->accum; - - /* The backend only needs enabling if any counters are enabled */ - if (accum->enable_map_any_enabled) - errcode = hctx->iface->dump_enable_nolock( - accum->backend, &accum->enable_map); - - if (!errcode) - accum->state = ACCUM_STATE_ENABLED; - else - accum->state = ACCUM_STATE_ERROR; -} - -/** - * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date - * values of enabled counters possible, and - * optionally update the set of enabled - * counters. - * @hctx : Non-NULL pointer to the hardware counter context - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * @new_map: Pointer to the new counter enable map. If non-NULL, must have - * the same metadata as the accumulator. If NULL, the set of - * enabled counters will be unchanged. - */ -static int kbasep_hwcnt_accumulator_dump( - struct kbase_hwcnt_context *hctx, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf, - const struct kbase_hwcnt_enable_map *new_map) -{ - int errcode = 0; - unsigned long flags; - enum kbase_hwcnt_accum_state state; - bool dump_requested = false; - bool dump_written = false; - bool cur_map_any_enabled; - struct kbase_hwcnt_enable_map *cur_map; - bool new_map_any_enabled = false; - u64 dump_time_ns; - struct kbase_hwcnt_accumulator *accum; - - WARN_ON(!hctx); - WARN_ON(!ts_start_ns); - WARN_ON(!ts_end_ns); - WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); - WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); - WARN_ON(!hctx->accum_inited); - lockdep_assert_held(&hctx->accum_lock); - - accum = &hctx->accum; - cur_map = &accum->scratch_map; - - /* Save out info about the current enable map */ - cur_map_any_enabled = accum->enable_map_any_enabled; - kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); - - if (new_map) - new_map_any_enabled = - kbase_hwcnt_enable_map_any_enabled(new_map); - - /* - * We're holding accum_lock, so the accumulator state might transition - * from disabled to enabled during this function (as enabling is lock - * free), but it will never disable (as disabling needs to hold the - * accum_lock), nor will it ever transition from enabled to error (as - * an enable while we're already enabled is impossible). - * - * If we're already disabled, we'll only look at the accumulation buffer - * rather than do a real dump, so a concurrent enable does not affect - * us. - * - * If a concurrent enable fails, we might transition to the error - * state, but again, as we're only looking at the accumulation buffer, - * it's not an issue. - */ - spin_lock_irqsave(&hctx->state_lock, flags); - - state = accum->state; - - /* - * Update the new map now, such that if an enable occurs during this - * dump then that enable will set the new map. If we're already enabled, - * then we'll do it ourselves after the dump. - */ - if (new_map) { - kbase_hwcnt_enable_map_copy( - &accum->enable_map, new_map); - accum->enable_map_any_enabled = new_map_any_enabled; - } - - spin_unlock_irqrestore(&hctx->state_lock, flags); - - /* Error state, so early out. No need to roll back any map updates */ - if (state == ACCUM_STATE_ERROR) - return -EIO; - - /* Initiate the dump if the backend is enabled. */ - if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { - /* Disable pre-emption, to make the timestamp as accurate as - * possible. - */ - preempt_disable(); - { - dump_time_ns = hctx->iface->timestamp_ns( - accum->backend); - if (dump_buf) { - errcode = hctx->iface->dump_request( - accum->backend); - dump_requested = true; - } else { - errcode = hctx->iface->dump_clear( - accum->backend); - } - } - preempt_enable(); - if (errcode) - goto error; - } else { - dump_time_ns = hctx->iface->timestamp_ns(accum->backend); - } - - /* Copy any accumulation into the dest buffer */ - if (accum->accumulated && dump_buf) { - kbase_hwcnt_dump_buffer_copy( - dump_buf, &accum->accum_buf, cur_map); - dump_written = true; - } - - /* Wait for any requested dumps to complete */ - if (dump_requested) { - WARN_ON(state != ACCUM_STATE_ENABLED); - errcode = hctx->iface->dump_wait(accum->backend); - if (errcode) - goto error; - } - - /* If we're enabled and there's a new enable map, change the enabled set - * as soon after the dump has completed as possible. - */ - if ((state == ACCUM_STATE_ENABLED) && new_map) { - /* Backend is only enabled if there were any enabled counters */ - if (cur_map_any_enabled) - hctx->iface->dump_disable(accum->backend); - - /* (Re-)enable the backend if the new map has enabled counters. - * No need to acquire the spinlock, as concurrent enable while - * we're already enabled and holding accum_lock is impossible. - */ - if (new_map_any_enabled) { - errcode = hctx->iface->dump_enable( - accum->backend, new_map); - if (errcode) - goto error; - } - } - - /* Copy, accumulate, or zero into the dest buffer to finish */ - if (dump_buf) { - /* If we dumped, copy or accumulate it into the destination */ - if (dump_requested) { - WARN_ON(state != ACCUM_STATE_ENABLED); - errcode = hctx->iface->dump_get( - accum->backend, - dump_buf, - cur_map, - dump_written); - if (errcode) - goto error; - dump_written = true; - } - - /* If we've not written anything into the dump buffer so far, it - * means there was nothing to write. Zero any enabled counters. - */ - if (!dump_written) - kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); - } - - /* Write out timestamps */ - *ts_start_ns = accum->ts_last_dump_ns; - *ts_end_ns = dump_time_ns; - - accum->accumulated = false; - accum->ts_last_dump_ns = dump_time_ns; - - return 0; -error: - /* An error was only physically possible if the backend was enabled */ - WARN_ON(state != ACCUM_STATE_ENABLED); - - /* Disable the backend, and transition to the error state */ - hctx->iface->dump_disable(accum->backend); - spin_lock_irqsave(&hctx->state_lock, flags); - - accum->state = ACCUM_STATE_ERROR; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - - return errcode; -} - -/** - * kbasep_hwcnt_context_disable() - Increment the disable count of the context. - * @hctx: Non-NULL pointer to hardware counter context. - * @accumulate: True if we should accumulate before disabling, else false. - */ -static void kbasep_hwcnt_context_disable( - struct kbase_hwcnt_context *hctx, bool accumulate) -{ - unsigned long flags; - - WARN_ON(!hctx); - lockdep_assert_held(&hctx->accum_lock); - - if (!kbase_hwcnt_context_disable_atomic(hctx)) { - kbasep_hwcnt_accumulator_disable(hctx, accumulate); - - spin_lock_irqsave(&hctx->state_lock, flags); - - /* Atomic disable failed and we're holding the mutex, so current - * disable count must be 0. - */ - WARN_ON(hctx->disable_count != 0); - hctx->disable_count++; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - } -} - -int kbase_hwcnt_accumulator_acquire( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_accumulator **accum) -{ - int errcode = 0; - unsigned long flags; - - if (!hctx || !accum) - return -EINVAL; - - mutex_lock(&hctx->accum_lock); - spin_lock_irqsave(&hctx->state_lock, flags); - - if (!hctx->accum_inited) - /* Set accum initing now to prevent concurrent init */ - hctx->accum_inited = true; - else - /* Already have an accum, or already being inited */ - errcode = -EBUSY; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - mutex_unlock(&hctx->accum_lock); - - if (errcode) - return errcode; - - errcode = kbasep_hwcnt_accumulator_init(hctx); - - if (errcode) { - mutex_lock(&hctx->accum_lock); - spin_lock_irqsave(&hctx->state_lock, flags); - - hctx->accum_inited = false; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - mutex_unlock(&hctx->accum_lock); - - return errcode; - } - - spin_lock_irqsave(&hctx->state_lock, flags); - - WARN_ON(hctx->disable_count == 0); - WARN_ON(hctx->accum.enable_map_any_enabled); - - /* Decrement the disable count to allow the accumulator to be accessible - * now that it's fully constructed. - */ - hctx->disable_count--; - - /* - * Make sure the accumulator is initialised to the correct state. - * Regardless of initial state, counters don't need to be enabled via - * the backend, as the initial enable map has no enabled counters. - */ - hctx->accum.state = (hctx->disable_count == 0) ? - ACCUM_STATE_ENABLED : - ACCUM_STATE_DISABLED; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - - *accum = &hctx->accum; - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); - -void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) -{ - unsigned long flags; - struct kbase_hwcnt_context *hctx; - - if (!accum) - return; - - hctx = container_of(accum, struct kbase_hwcnt_context, accum); - - mutex_lock(&hctx->accum_lock); - - /* Double release is a programming error */ - WARN_ON(!hctx->accum_inited); - - /* Disable the context to ensure the accumulator is inaccesible while - * we're destroying it. This performs the corresponding disable count - * increment to the decrement done during acquisition. - */ - kbasep_hwcnt_context_disable(hctx, false); - - mutex_unlock(&hctx->accum_lock); - - kbasep_hwcnt_accumulator_term(hctx); - - mutex_lock(&hctx->accum_lock); - spin_lock_irqsave(&hctx->state_lock, flags); - - hctx->accum_inited = false; - - spin_unlock_irqrestore(&hctx->state_lock, flags); - mutex_unlock(&hctx->accum_lock); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); - -void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) -{ - if (WARN_ON(!hctx)) - return; - - /* Try and atomically disable first, so we can avoid locking the mutex - * if we don't need to. - */ - if (kbase_hwcnt_context_disable_atomic(hctx)) - return; - - mutex_lock(&hctx->accum_lock); - - kbasep_hwcnt_context_disable(hctx, true); - - mutex_unlock(&hctx->accum_lock); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); - -bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) -{ - unsigned long flags; - bool atomic_disabled = false; - - if (WARN_ON(!hctx)) - return false; - - spin_lock_irqsave(&hctx->state_lock, flags); - - if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { - /* - * If disable count is non-zero or no counters are enabled, we - * can just bump the disable count. - * - * Otherwise, we can't disable in an atomic context. - */ - if (hctx->disable_count != 0) { - hctx->disable_count++; - atomic_disabled = true; - } else { - WARN_ON(!hctx->accum_inited); - if (!hctx->accum.enable_map_any_enabled) { - hctx->disable_count++; - hctx->accum.state = ACCUM_STATE_DISABLED; - atomic_disabled = true; - } - } - } - - spin_unlock_irqrestore(&hctx->state_lock, flags); - - return atomic_disabled; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); - -void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) -{ - unsigned long flags; - - if (WARN_ON(!hctx)) - return; - - spin_lock_irqsave(&hctx->state_lock, flags); - - if (!WARN_ON(hctx->disable_count == 0)) { - if (hctx->disable_count == 1) - kbasep_hwcnt_accumulator_enable(hctx); - - hctx->disable_count--; - } - - spin_unlock_irqrestore(&hctx->state_lock, flags); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); - -const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( - struct kbase_hwcnt_context *hctx) -{ - if (!hctx) - return NULL; - - return hctx->iface->metadata; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); - -int kbase_hwcnt_accumulator_set_counters( - struct kbase_hwcnt_accumulator *accum, - const struct kbase_hwcnt_enable_map *new_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_context *hctx; - - if (!accum || !new_map || !ts_start_ns || !ts_end_ns) - return -EINVAL; - - hctx = container_of(accum, struct kbase_hwcnt_context, accum); - - if ((new_map->metadata != hctx->iface->metadata) || - (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) - return -EINVAL; - - mutex_lock(&hctx->accum_lock); - - errcode = kbasep_hwcnt_accumulator_dump( - hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); - - mutex_unlock(&hctx->accum_lock); - - return errcode; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); - -int kbase_hwcnt_accumulator_dump( - struct kbase_hwcnt_accumulator *accum, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_context *hctx; - - if (!accum || !ts_start_ns || !ts_end_ns) - return -EINVAL; - - hctx = container_of(accum, struct kbase_hwcnt_context, accum); - - if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) - return -EINVAL; - - mutex_lock(&hctx->accum_lock); - - errcode = kbasep_hwcnt_accumulator_dump( - hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); - - mutex_unlock(&hctx->accum_lock); - - return errcode; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_accumulator.h deleted file mode 100644 index fc45743e264c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_accumulator.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Hardware counter accumulator API. - */ - -#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ -#define _KBASE_HWCNT_ACCUMULATOR_H_ - -#include - -struct kbase_hwcnt_context; -struct kbase_hwcnt_accumulator; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/** - * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator - * for a hardware counter context. - * @hctx: Non-NULL pointer to a hardware counter context. - * @accum: Non-NULL pointer to where the pointer to the created accumulator - * will be stored on success. - * - * There can exist at most one instance of the hardware counter accumulator per - * context at a time. - * - * If multiple clients need access to the hardware counters at the same time, - * then an abstraction built on top of the single instance to the hardware - * counter accumulator is required. - * - * No counters will be enabled with the returned accumulator. A subsequent call - * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. - * - * There are four components to a hardware counter dump: - * - A set of enabled counters - * - A start time - * - An end time - * - A dump buffer containing the accumulated counter values for all enabled - * counters between the start and end times. - * - * For each dump, it is guaranteed that all enabled counters were active for the - * entirety of the period between the start and end times. - * - * It is also guaranteed that the start time of dump "n" is always equal to the - * end time of dump "n - 1". - * - * For all dumps, the values of any counters that were not enabled is undefined. - * - * Return: 0 on success or error code. - */ -int kbase_hwcnt_accumulator_acquire( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_accumulator **accum); - -/** - * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. - * @accum: Non-NULL pointer to the hardware counter accumulator. - * - * The accumulator must be released before the context the accumulator was - * created from is terminated. - */ -void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); - -/** - * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently - * enabled counters, and enable a new - * set of counters that will be used - * for subsequent dumps. - * @accum: Non-NULL pointer to the hardware counter accumulator. - * @new_map: Non-NULL pointer to the new counter enable map. Must have the - * same metadata as the accumulator. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * If this function fails for some unexpected reason (i.e. anything other than - * invalid args), then the accumulator will be put into the error state until - * the parent context is next disabled. - * - * Return: 0 on success or error code. - */ -int kbase_hwcnt_accumulator_set_counters( - struct kbase_hwcnt_accumulator *accum, - const struct kbase_hwcnt_enable_map *new_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); - -/** - * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled - * counters. - * @accum: Non-NULL pointer to the hardware counter accumulator. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * If this function fails for some unexpected reason (i.e. anything other than - * invalid args), then the accumulator will be put into the error state until - * the parent context is next disabled. - * - * Return: 0 on success or error code. - */ -int kbase_hwcnt_accumulator_dump( - struct kbase_hwcnt_accumulator *accum, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); - -#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend.h deleted file mode 100644 index b7aa0e1fa8e9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Virtual interface for hardware counter backends. - */ - -#ifndef _KBASE_HWCNT_BACKEND_H_ -#define _KBASE_HWCNT_BACKEND_H_ - -#include - -struct kbase_hwcnt_metadata; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/* - * struct kbase_hwcnt_backend_info - Opaque pointer to information used to - * create an instance of a hardware counter - * backend. - */ -struct kbase_hwcnt_backend_info; - -/* - * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter - * backend, used to perform dumps. - */ -struct kbase_hwcnt_backend; - -/** - * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. - * @info: Non-NULL pointer to backend info. - * @out_backend: Non-NULL pointer to where backend is stored on success. - * - * All uses of the created hardware counter backend must be externally - * synchronised. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_init_fn)( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend); - -/** - * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. - * @backend: Pointer to backend to be terminated. - */ -typedef void (*kbase_hwcnt_backend_term_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend - * timestamp. - * @backend: Non-NULL pointer to backend. - * - * Return: Backend timestamp in nanoseconds. - */ -typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the - * backend. - * @backend: Non-NULL pointer to backend. - * @enable_map: Non-NULL pointer to enable map specifying enabled counters. - * - * The enable_map must have been created using the interface's metadata. - * If the backend has already been enabled, an error is returned. - * - * May be called in an atomic context. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_enable_fn)( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map); - -/** - * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping - * with the backend. - * @backend: Non-NULL pointer to backend. - * @enable_map: Non-NULL pointer to enable map specifying enabled counters. - * - * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be - * called in an atomic context with the spinlock documented by the specific - * backend interface held. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map); - -/** - * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with - * the backend. - * @backend: Non-NULL pointer to backend. - * - * If the backend is already disabled, does nothing. - * Any undumped counter values since the last dump get will be lost. - */ -typedef void (*kbase_hwcnt_backend_dump_disable_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped - * counters. - * @backend: Non-NULL pointer to backend. - * - * If the backend is not enabled, returns an error. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_clear_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter - * dump. - * @backend: Non-NULL pointer to backend. - * - * If the backend is not enabled or another dump is already in progress, - * returns an error. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_request_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested - * counter dump has completed. - * @backend: Non-NULL pointer to backend. - * - * If the backend is not enabled, returns an error. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_wait_fn)( - struct kbase_hwcnt_backend *backend); - -/** - * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the - * counters dumped after the last dump - * request into the dump buffer. - * @backend: Non-NULL pointer to backend. - * @dump_buffer: Non-NULL pointer to destination dump buffer. - * @enable_map: Non-NULL pointer to enable map specifying enabled values. - * @accumulate: True if counters should be accumulated into dump_buffer, rather - * than copied. - * - * If the backend is not enabled, returns an error. - * If a dump is in progress (i.e. dump_wait has not yet returned successfully) - * then the resultant contents of the dump buffer will be undefined. - * - * Return: 0 on success, else error code. - */ -typedef int (*kbase_hwcnt_backend_dump_get_fn)( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dump_buffer, - const struct kbase_hwcnt_enable_map *enable_map, - bool accumulate); - -/** - * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual - * interface. - * @metadata: Immutable hardware counter metadata. - * @info: Immutable info used to initialise an instance of the - * backend. - * @init: Function ptr to initialise an instance of the backend. - * @term: Function ptr to terminate an instance of the backend. - * @timestamp_ns: Function ptr to get the current backend timestamp. - * @dump_enable: Function ptr to enable dumping. - * @dump_enable_nolock: Function ptr to enable dumping while the - * backend-specific spinlock is already held. - * @dump_disable: Function ptr to disable dumping. - * @dump_clear: Function ptr to clear counters. - * @dump_request: Function ptr to request a dump. - * @dump_wait: Function ptr to wait until dump to complete. - * @dump_get: Function ptr to copy or accumulate dump into a dump - * buffer. - */ -struct kbase_hwcnt_backend_interface { - const struct kbase_hwcnt_metadata *metadata; - const struct kbase_hwcnt_backend_info *info; - kbase_hwcnt_backend_init_fn init; - kbase_hwcnt_backend_term_fn term; - kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; - kbase_hwcnt_backend_dump_enable_fn dump_enable; - kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; - kbase_hwcnt_backend_dump_disable_fn dump_disable; - kbase_hwcnt_backend_dump_clear_fn dump_clear; - kbase_hwcnt_backend_dump_request_fn dump_request; - kbase_hwcnt_backend_dump_wait_fn dump_wait; - kbase_hwcnt_backend_dump_get_fn dump_get; -}; - -#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.c deleted file mode 100644 index 4bc8916922b9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.c +++ /dev/null @@ -1,538 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_backend_gpu.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" -#include "mali_kbase_pm_policy.h" -#include "mali_kbase_hwaccess_instr.h" -#include "mali_kbase_tlstream.h" -#ifdef CONFIG_MALI_NO_MALI -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif - -/** - * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance - * of a GPU hardware counter backend. - * @kbdev: KBase device. - * @use_secondary: True if secondary performance counters should be used, - * else false. Ignored if secondary counters are not supported. - * @metadata: Hardware counter metadata. - * @dump_bytes: Bytes of GPU memory required to perform a - * hardware counter dump. - */ -struct kbase_hwcnt_backend_gpu_info { - struct kbase_device *kbdev; - bool use_secondary; - const struct kbase_hwcnt_metadata *metadata; - size_t dump_bytes; -}; - -/** - * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend. - * @info: Info used to create the backend. - * @kctx: KBase context used for GPU memory allocation and - * counter dumping. - * @kctx_element: List element used to add kctx to device context list. - * @gpu_dump_va: GPU hardware counter dump buffer virtual address. - * @cpu_dump_va: CPU mapping of gpu_dump_va. - * @vmap: Dump buffer vmap. - * @enabled: True if dumping has been enabled, else false. - */ -struct kbase_hwcnt_backend_gpu { - const struct kbase_hwcnt_backend_gpu_info *info; - struct kbase_context *kctx; - struct kbasep_kctx_list_element *kctx_element; - u64 gpu_dump_va; - void *cpu_dump_va; - struct kbase_vmap_struct *vmap; - bool enabled; -}; - -/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ -static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( - struct kbase_hwcnt_backend *backend) -{ - struct timespec ts; - - (void)backend; - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ -static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - struct kbase_context *kctx; - struct kbase_device *kbdev; - struct kbase_hwcnt_physical_enable_map phys; - struct kbase_instr_hwcnt_enable enable; - - if (!backend_gpu || !enable_map || backend_gpu->enabled || - (enable_map->metadata != backend_gpu->info->metadata)) - return -EINVAL; - - kctx = backend_gpu->kctx; - kbdev = backend_gpu->kctx->kbdev; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); - - enable.jm_bm = phys.jm_bm; - enable.shader_bm = phys.shader_bm; - enable.tiler_bm = phys.tiler_bm; - enable.mmu_l2_bm = phys.mmu_l2_bm; - enable.use_secondary = backend_gpu->info->use_secondary; - enable.dump_buffer = backend_gpu->gpu_dump_va; - enable.dump_buffer_bytes = backend_gpu->info->dump_bytes; - - errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); - if (errcode) - goto error; - - backend_gpu->enabled = true; - - return 0; -error: - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */ -static int kbasep_hwcnt_backend_gpu_dump_enable( - struct kbase_hwcnt_backend *backend, - const struct kbase_hwcnt_enable_map *enable_map) -{ - unsigned long flags; - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - struct kbase_device *kbdev; - - if (!backend_gpu) - return -EINVAL; - - kbdev = backend_gpu->kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock( - backend, enable_map); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_gpu_dump_disable( - struct kbase_hwcnt_backend *backend) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (WARN_ON(!backend_gpu) || !backend_gpu->enabled) - return; - - errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx); - WARN_ON(errcode); - - backend_gpu->enabled = false; -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */ -static int kbasep_hwcnt_backend_gpu_dump_clear( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_clear(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */ -static int kbasep_hwcnt_backend_gpu_dump_request( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_request_dump(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */ -static int kbasep_hwcnt_backend_gpu_dump_wait( - struct kbase_hwcnt_backend *backend) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !backend_gpu->enabled) - return -EINVAL; - - return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx); -} - -/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */ -static int kbasep_hwcnt_backend_gpu_dump_get( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) -{ - struct kbase_hwcnt_backend_gpu *backend_gpu = - (struct kbase_hwcnt_backend_gpu *)backend; - - if (!backend_gpu || !dst || !dst_enable_map || - (backend_gpu->info->metadata != dst->metadata) || - (dst_enable_map->metadata != dst->metadata)) - return -EINVAL; - - /* Invalidate the kernel buffer before reading from it. */ - kbase_sync_mem_regions( - backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); - - return kbase_hwcnt_gpu_dump_get( - dst, backend_gpu->cpu_dump_va, dst_enable_map, accumulate); -} - -/** - * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer. - * @info: Non-NULL pointer to GPU backend info. - * @kctx: Non-NULL pointer to kbase context. - * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address - * is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_dump_alloc( - const struct kbase_hwcnt_backend_gpu_info *info, - struct kbase_context *kctx, - u64 *gpu_dump_va) -{ - struct kbase_va_region *reg; - u64 flags; - u64 nr_pages; - - WARN_ON(!info); - WARN_ON(!kctx); - WARN_ON(!gpu_dump_va); - - flags = BASE_MEM_PROT_CPU_RD | - BASE_MEM_PROT_GPU_WR | - BASE_MEM_PERMANENT_KERNEL_MAPPING | - BASE_MEM_CACHED_CPU; - - if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) - flags |= BASE_MEM_UNCACHED_GPU; - - nr_pages = PFN_UP(info->dump_bytes); - - reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); - - if (!reg) - return -ENOMEM; - - return 0; -} - -/** - * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer. - * @kctx: Non-NULL pointer to kbase context. - * @gpu_dump_va: GPU dump buffer virtual address. - */ -static void kbasep_hwcnt_backend_gpu_dump_free( - struct kbase_context *kctx, - u64 gpu_dump_va) -{ - WARN_ON(!kctx); - if (gpu_dump_va) - kbase_mem_free(kctx, gpu_dump_va); -} - -/** - * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend. - * @backend: Pointer to GPU backend to destroy. - * - * Can be safely called on a backend in any state of partial construction. - */ -static void kbasep_hwcnt_backend_gpu_destroy( - struct kbase_hwcnt_backend_gpu *backend) -{ - if (!backend) - return; - - if (backend->kctx) { - struct kbase_context *kctx = backend->kctx; - struct kbase_device *kbdev = kctx->kbdev; - - if (backend->cpu_dump_va) - kbase_phy_alloc_mapping_put(kctx, backend->vmap); - - if (backend->gpu_dump_va) - kbasep_hwcnt_backend_gpu_dump_free( - kctx, backend->gpu_dump_va); - - if (backend->kctx_element) { - mutex_lock(&kbdev->kctx_list_lock); - - KBASE_TLSTREAM_TL_DEL_CTX(kctx); - list_del(&backend->kctx_element->link); - - mutex_unlock(&kbdev->kctx_list_lock); - kfree(backend->kctx_element); - } - - kbasep_js_release_privileged_ctx(kbdev, kctx); - kbase_destroy_context(kctx); - } - - kfree(backend); -} - -/** - * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend. - * @info: Non-NULL pointer to backend info. - * @out_backend: Non-NULL pointer to where backend is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_create( - const struct kbase_hwcnt_backend_gpu_info *info, - struct kbase_hwcnt_backend_gpu **out_backend) -{ - int errcode; - struct kbase_device *kbdev; - struct kbase_hwcnt_backend_gpu *backend = NULL; - - WARN_ON(!info); - WARN_ON(!out_backend); - - kbdev = info->kbdev; - - backend = kzalloc(sizeof(*backend), GFP_KERNEL); - if (!backend) - goto alloc_error; - - backend->info = info; - - backend->kctx = kbase_create_context(kbdev, true); - if (!backend->kctx) - goto alloc_error; - - kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); - - backend->kctx_element = kzalloc( - sizeof(*backend->kctx_element), GFP_KERNEL); - if (!backend->kctx_element) - goto alloc_error; - - backend->kctx_element->kctx = backend->kctx; - - /* Add kernel context to list of contexts associated with device. */ - mutex_lock(&kbdev->kctx_list_lock); - - list_add(&backend->kctx_element->link, &kbdev->kctx_list); - /* Fire tracepoint while lock is held, to ensure tracepoint is not - * created in both body and summary stream - */ - KBASE_TLSTREAM_TL_NEW_CTX( - backend->kctx, backend->kctx->id, (u32)(backend->kctx->tgid)); - - mutex_unlock(&kbdev->kctx_list_lock); - - errcode = kbasep_hwcnt_backend_gpu_dump_alloc( - info, backend->kctx, &backend->gpu_dump_va); - if (errcode) - goto error; - - backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, - backend->gpu_dump_va, &backend->vmap); - if (!backend->cpu_dump_va) - goto alloc_error; - -#ifdef CONFIG_MALI_NO_MALI - /* The dummy model needs the CPU mapping. */ - gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); -#endif - - *out_backend = backend; - return 0; - -alloc_error: - errcode = -ENOMEM; -error: - kbasep_hwcnt_backend_gpu_destroy(backend); - return errcode; -} - -/* GPU backend implementation of kbase_hwcnt_backend_init_fn */ -static int kbasep_hwcnt_backend_gpu_init( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend) -{ - int errcode; - struct kbase_hwcnt_backend_gpu *backend = NULL; - - if (!info || !out_backend) - return -EINVAL; - - errcode = kbasep_hwcnt_backend_gpu_create( - (const struct kbase_hwcnt_backend_gpu_info *) info, &backend); - if (errcode) - return errcode; - - *out_backend = (struct kbase_hwcnt_backend *)backend; - - return 0; -} - -/* GPU backend implementation of kbase_hwcnt_backend_term_fn */ -static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend) -{ - if (!backend) - return; - - kbasep_hwcnt_backend_gpu_dump_disable(backend); - kbasep_hwcnt_backend_gpu_destroy( - (struct kbase_hwcnt_backend_gpu *)backend); -} - -/** - * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info. - * @info: Pointer to info to destroy. - * - * Can be safely called on a backend info in any state of partial construction. - */ -static void kbasep_hwcnt_backend_gpu_info_destroy( - const struct kbase_hwcnt_backend_gpu_info *info) -{ - if (!info) - return; - - kbase_hwcnt_gpu_metadata_destroy(info->metadata); - kfree(info); -} - -/** - * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info. - * @kbdev: Non_NULL pointer to kbase device. - * @out_info: Non-NULL pointer to where info is stored on success. - * - * Return 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_info_create( - struct kbase_device *kbdev, - const struct kbase_hwcnt_backend_gpu_info **out_info) -{ - int errcode = -ENOMEM; - struct kbase_hwcnt_gpu_info hwcnt_gpu_info; - struct kbase_hwcnt_backend_gpu_info *info = NULL; - - WARN_ON(!kbdev); - WARN_ON(!out_info); - - errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); - if (errcode) - return errcode; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - goto error; - - info->kbdev = kbdev; - -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY - info->use_secondary = true; -#else - info->use_secondary = false; -#endif - - errcode = kbase_hwcnt_gpu_metadata_create( - &hwcnt_gpu_info, info->use_secondary, - &info->metadata, - &info->dump_bytes); - if (errcode) - goto error; - - *out_info = info; - - return 0; -error: - kbasep_hwcnt_backend_gpu_info_destroy(info); - return errcode; -} - -int kbase_hwcnt_backend_gpu_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_interface *iface) -{ - int errcode; - const struct kbase_hwcnt_backend_gpu_info *info = NULL; - - if (!kbdev || !iface) - return -EINVAL; - - errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info); - - if (errcode) - return errcode; - - iface->metadata = info->metadata; - iface->info = (struct kbase_hwcnt_backend_info *)info; - iface->init = kbasep_hwcnt_backend_gpu_init; - iface->term = kbasep_hwcnt_backend_gpu_term; - iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns; - iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable; - iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock; - iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable; - iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear; - iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request; - iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait; - iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get; - - return 0; -} - -void kbase_hwcnt_backend_gpu_destroy( - struct kbase_hwcnt_backend_interface *iface) -{ - if (!iface) - return; - - kbasep_hwcnt_backend_gpu_info_destroy( - (const struct kbase_hwcnt_backend_gpu_info *)iface->info); - memset(iface, 0, sizeof(*iface)); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.h deleted file mode 100644 index 7712f1424a8b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_backend_gpu.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU - * backend. - */ - -#ifndef _KBASE_HWCNT_BACKEND_GPU_H_ -#define _KBASE_HWCNT_BACKEND_GPU_H_ - -#include "mali_kbase_hwcnt_backend.h" - -struct kbase_device; - -/** - * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend - * interface. - * @kbdev: Non-NULL pointer to kbase device. - * @iface: Non-NULL pointer to backend interface structure that is filled in - * on creation success. - * - * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_backend_gpu_create( - struct kbase_device *kbdev, - struct kbase_hwcnt_backend_interface *iface); - -/** - * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend - * interface. - * @iface: Pointer to interface to destroy. - * - * Can be safely called on an all-zeroed interface, or on an already destroyed - * interface. - */ -void kbase_hwcnt_backend_gpu_destroy( - struct kbase_hwcnt_backend_interface *iface); - -#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_context.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_context.h deleted file mode 100644 index bc50ad12c2f4..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_context.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Hardware counter context API. - */ - -#ifndef _KBASE_HWCNT_CONTEXT_H_ -#define _KBASE_HWCNT_CONTEXT_H_ - -#include - -struct kbase_hwcnt_backend_interface; -struct kbase_hwcnt_context; - -/** - * kbase_hwcnt_context_init() - Initialise a hardware counter context. - * @iface: Non-NULL pointer to a hardware counter backend interface. - * @out_hctx: Non-NULL pointer to where the pointer to the created context will - * be stored on success. - * - * On creation, the disable count of the context will be 0. - * A hardware counter accumulator can be acquired using a created context. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_context_init( - const struct kbase_hwcnt_backend_interface *iface, - struct kbase_hwcnt_context **out_hctx); - -/** - * kbase_hwcnt_context_term() - Terminate a hardware counter context. - * @hctx: Pointer to context to be terminated. - */ -void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); - -/** - * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by - * the context, so related counter data - * structures can be created. - * @hctx: Non-NULL pointer to the hardware counter context. - * - * Return: Non-NULL pointer to metadata, or NULL on error. - */ -const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( - struct kbase_hwcnt_context *hctx); - -/** - * kbase_hwcnt_context_disable() - Increment the disable count of the context. - * @hctx: Pointer to the hardware counter context. - * - * If a call to this function increments the disable count from 0 to 1, and - * an accumulator has been acquired, then a counter dump will be performed - * before counters are disabled via the backend interface. - * - * Subsequent dumps via the accumulator while counters are disabled will first - * return the accumulated dump, then will return dumps with zeroed counters. - * - * After this function call returns, it is guaranteed that counters will not be - * enabled via the backend interface. - */ -void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); - -/** - * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the - * context if possible in an atomic - * context. - * @hctx: Pointer to the hardware counter context. - * - * This function will only succeed if hardware counters are effectively already - * disabled, i.e. there is no accumulator, the disable count is already - * non-zero, or the accumulator has no counters set. - * - * After this function call returns true, it is guaranteed that counters will - * not be enabled via the backend interface. - * - * Return: True if the disable count was incremented, else False. - */ -bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); - -/** - * kbase_hwcnt_context_enable() - Decrement the disable count of the context. - * @hctx: Pointer to the hardware counter context. - * - * If a call to this function decrements the disable count from 1 to 0, and - * an accumulator has been acquired, then counters will be re-enabled via the - * backend interface. - * - * If an accumulator has been acquired and enabling counters fails for some - * reason, the accumulator will be placed into an error state. - * - * It is only valid to call this function one time for each prior returned call - * to kbase_hwcnt_context_disable. - * - * The spinlock documented in the backend interface that was passed in to - * kbase_hwcnt_context_init() must be held before calling this function. - */ -void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); - -#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.c deleted file mode 100644 index 647d3ecdf100..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" -#ifdef CONFIG_MALI_NO_MALI -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif - -#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 -#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 -#define KBASE_HWCNT_V4_MAX_GROUPS \ - (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) -#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 -#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ - (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) -/* Index of the PRFCNT_EN header into a V4 counter block */ -#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 - -#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 -#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 -#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ - (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) -/* Index of the PRFCNT_EN header into a V5 counter block */ -#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 - -/** - * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter - * metadata for a v4 GPU. - * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. - * @metadata: Non-NULL pointer to where created metadata is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_metadata_v4_create( - const struct kbase_hwcnt_gpu_v4_info *v4_info, - const struct kbase_hwcnt_metadata **metadata) -{ - size_t grp; - int errcode = -ENOMEM; - struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description *grps; - size_t avail_mask_bit; - - WARN_ON(!v4_info); - WARN_ON(!metadata); - - /* Check if there are enough bits in the availability mask to represent - * all the hardware counter blocks in the system. - */ - if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) - return -EINVAL; - - grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); - if (!grps) - goto clean_up; - - desc.grp_cnt = v4_info->cg_count; - desc.grps = grps; - - for (grp = 0; grp < v4_info->cg_count; grp++) { - size_t blk; - size_t sc; - const u64 core_mask = v4_info->cgs[grp].core_mask; - struct kbase_hwcnt_block_description *blks = kcalloc( - KBASE_HWCNT_V4_BLOCKS_PER_GROUP, - sizeof(*blks), - GFP_KERNEL); - - if (!blks) - goto clean_up; - - grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; - grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; - grps[grp].blks = blks; - - for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { - blks[blk].inst_cnt = 1; - blks[blk].hdr_cnt = - KBASE_HWCNT_V4_HEADERS_PER_BLOCK; - blks[blk].ctr_cnt = - KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; - } - - for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { - blks[sc].type = core_mask & (1ull << sc) ? - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; - } - - blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; - blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; - blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; - blks[7].type = (grp == 0) ? - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; - - WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); - } - - /* Initialise the availability mask */ - desc.avail_mask = 0; - avail_mask_bit = 0; - - for (grp = 0; grp < desc.grp_cnt; grp++) { - size_t blk; - const struct kbase_hwcnt_block_description *blks = - desc.grps[grp].blks; - for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { - WARN_ON(blks[blk].inst_cnt != 1); - if (blks[blk].type != - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) - desc.avail_mask |= (1ull << avail_mask_bit); - - avail_mask_bit++; - } - } - - errcode = kbase_hwcnt_metadata_create(&desc, metadata); - - /* Always clean up, as metadata will make a copy of the input args */ -clean_up: - if (grps) { - for (grp = 0; grp < v4_info->cg_count; grp++) - kfree(grps[grp].blks); - kfree(grps); - } - return errcode; -} - -/** - * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a - * V4 GPU. - * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. - * - * Return: Size of buffer the V4 GPU needs to perform a counter dump. - */ -static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( - const struct kbase_hwcnt_gpu_v4_info *v4_info) -{ - return v4_info->cg_count * - KBASE_HWCNT_V4_BLOCKS_PER_GROUP * - KBASE_HWCNT_V4_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_BYTES; -} - -/** - * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter - * metadata for a v5 GPU. - * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. - * @use_secondary: True if secondary performance counters should be used, else - * false. Ignored if secondary counters are not supported. - * @metadata: Non-NULL pointer to where created metadata is stored - * on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_metadata_v5_create( - const struct kbase_hwcnt_gpu_v5_info *v5_info, - bool use_secondary, - const struct kbase_hwcnt_metadata **metadata) -{ - struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description - blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; - size_t non_sc_block_count; - size_t sc_block_count; - - WARN_ON(!v5_info); - WARN_ON(!metadata); - - /* Calculate number of block instances that aren't shader cores */ - non_sc_block_count = 2 + v5_info->l2_count; - /* Calculate number of block instances that are shader cores */ - sc_block_count = fls64(v5_info->core_mask); - - /* - * A system can have up to 64 shader cores, but the 64-bit - * availability mask can't physically represent that many cores as well - * as the other hardware blocks. - * Error out if there are more blocks than our implementation can - * support. - */ - if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) - return -EINVAL; - - /* One Job Manager block */ - blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; - blks[0].inst_cnt = 1; - blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; - - /* One Tiler block */ - blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; - blks[1].inst_cnt = 1; - blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; - - /* l2_count memsys blks */ - blks[2].type = use_secondary ? - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; - blks[2].inst_cnt = v5_info->l2_count; - blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; - - /* - * There are as many shader cores in the system as there are bits set in - * the core mask. However, the dump buffer memory requirements need to - * take into account the fact that the core mask may be non-contiguous. - * - * For example, a system with a core mask of 0b1011 has the same dump - * buffer memory requirements as a system with 0b1111, but requires more - * memory than a system with 0b0111. However, core 2 of the system with - * 0b1011 doesn't physically exist, and the dump buffer memory that - * accounts for that core will never be written to when we do a counter - * dump. - * - * We find the core mask's last set bit to determine the memory - * requirements, and embed the core mask into the availability mask so - * we can determine later which shader cores physically exist. - */ - blks[3].type = use_secondary ? - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; - blks[3].inst_cnt = sc_block_count; - blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; - - WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); - - group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; - group.blks = blks; - - desc.grp_cnt = 1; - desc.grps = &group; - - /* The JM, Tiler, and L2s are always available, and are before cores */ - desc.avail_mask = (1ull << non_sc_block_count) - 1; - /* Embed the core mask directly in the availability mask */ - desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); - - return kbase_hwcnt_metadata_create(&desc, metadata); -} - -/** - * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a - * V5 GPU. - * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. - * - * Return: Size of buffer the V5 GPU needs to perform a counter dump. - */ -static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( - const struct kbase_hwcnt_gpu_v5_info *v5_info) -{ - WARN_ON(!v5_info); - return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * - KBASE_HWCNT_V5_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_BYTES; -} - -int kbase_hwcnt_gpu_info_init( - struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info) -{ - if (!kbdev || !info) - return -EINVAL; - -#ifdef CONFIG_MALI_NO_MALI - /* NO_MALI uses V5 layout, regardless of the underlying platform. */ - info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; -#else - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; - info->v4.cg_count = kbdev->gpu_props.num_core_groups; - info->v4.cgs = kbdev->gpu_props.props.coherency_info.group; - } else { - const struct base_gpu_props *props = &kbdev->gpu_props.props; - const size_t l2_count = props->l2_props.num_l2_slices; - const size_t core_mask = - props->coherency_info.group[0].core_mask; - - info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - info->v5.l2_count = l2_count; - info->v5.core_mask = core_mask; - } -#endif - return 0; -} - -int kbase_hwcnt_gpu_metadata_create( - const struct kbase_hwcnt_gpu_info *info, - bool use_secondary, - const struct kbase_hwcnt_metadata **out_metadata, - size_t *out_dump_bytes) -{ - int errcode; - const struct kbase_hwcnt_metadata *metadata; - size_t dump_bytes; - - if (!info || !out_metadata || !out_dump_bytes) - return -EINVAL; - - switch (info->type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); - errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( - &info->v4, &metadata); - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); - errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( - &info->v5, use_secondary, &metadata); - break; - default: - return -EINVAL; - } - if (errcode) - return errcode; - - /* - * Dump abstraction size should be exactly the same size and layout as - * the physical dump size, for backwards compatibility. - */ - WARN_ON(dump_bytes != metadata->dump_buf_bytes); - - *out_metadata = metadata; - *out_dump_bytes = dump_bytes; - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); - -void kbase_hwcnt_gpu_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata) -{ - if (!metadata) - return; - - kbase_hwcnt_metadata_destroy(metadata); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); - -int kbase_hwcnt_gpu_dump_get( - struct kbase_hwcnt_dump_buffer *dst, - void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate) -{ - const struct kbase_hwcnt_metadata *metadata; - const u32 *dump_src; - size_t src_offset, grp, blk, blk_inst; - - if (!dst || !src || !dst_enable_map || - (dst_enable_map->metadata != dst->metadata)) - return -EINVAL; - - metadata = dst->metadata; - dump_src = (const u32 *)src; - src_offset = 0; - - kbase_hwcnt_metadata_for_each_block( - metadata, grp, blk, blk_inst) { - const size_t hdr_cnt = - kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); - const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); - - /* Early out if no values in the dest block are enabled */ - if (kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) { - u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u32 *src_blk = dump_src + src_offset; - - if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, ctr_cnt); - } else { - kbase_hwcnt_dump_buffer_block_copy( - dst_blk, src_blk, (hdr_cnt + ctr_cnt)); - } - } - - src_offset += (hdr_cnt + ctr_cnt); - } - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); - -/** - * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block - * enable map abstraction to - * a physical block enable - * map. - * @lo: Low 64 bits of block enable map abstraction. - * @hi: High 64 bits of block enable map abstraction. - * - * The abstraction uses 128 bits to enable 128 block values, whereas the - * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. - * Therefore, this conversion is lossy. - * - * Return: 32-bit physical block enable map. - */ -static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( - u64 lo, - u64 hi) -{ - u32 phys = 0; - u64 dwords[2] = {lo, hi}; - size_t dword_idx; - - for (dword_idx = 0; dword_idx < 2; dword_idx++) { - const u64 dword = dwords[dword_idx]; - u16 packed = 0; - - size_t hword_bit; - - for (hword_bit = 0; hword_bit < 16; hword_bit++) { - const size_t dword_bit = hword_bit * 4; - const u16 mask = - ((dword >> (dword_bit + 0)) & 0x1) | - ((dword >> (dword_bit + 1)) & 0x1) | - ((dword >> (dword_bit + 2)) & 0x1) | - ((dword >> (dword_bit + 3)) & 0x1); - packed |= (mask << hword_bit); - } - phys |= ((u32)packed) << (16 * dword_idx); - } - return phys; -} - -/** - * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical - * block enable map to a - * block enable map - * abstraction. - * @phys: Physical 32-bit block enable map - * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction - * will be stored. - * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction - * will be stored. - */ -static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( - u32 phys, - u64 *lo, - u64 *hi) -{ - u64 dwords[2] = {0, 0}; - - size_t dword_idx; - - for (dword_idx = 0; dword_idx < 2; dword_idx++) { - const u16 packed = phys >> (16 * dword_idx); - u64 dword = 0; - - size_t hword_bit; - - for (hword_bit = 0; hword_bit < 16; hword_bit++) { - const size_t dword_bit = hword_bit * 4; - const u64 mask = (packed >> (hword_bit)) & 0x1; - - dword |= mask << (dword_bit + 0); - dword |= mask << (dword_bit + 1); - dword |= mask << (dword_bit + 2); - dword |= mask << (dword_bit + 3); - } - dwords[dword_idx] = dword; - } - *lo = dwords[0]; - *hi = dwords[1]; -} - -void kbase_hwcnt_gpu_enable_map_to_physical( - struct kbase_hwcnt_physical_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) -{ - const struct kbase_hwcnt_metadata *metadata; - - u64 jm_bm = 0; - u64 shader_bm = 0; - u64 tiler_bm = 0; - u64 mmu_l2_bm = 0; - - size_t grp, blk, blk_inst; - - if (WARN_ON(!src) || WARN_ON(!dst)) - return; - - metadata = src->metadata; - - kbase_hwcnt_metadata_for_each_block( - metadata, grp, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type( - metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type( - metadata, grp, blk); - const size_t blk_val_cnt = - kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - src, grp, blk, blk_inst); - - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: - shader_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: - tiler_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: - mmu_l2_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - jm_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: - break; - default: - WARN_ON(true); - } - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - jm_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - tiler_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - shader_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - mmu_l2_bm |= *blk_map; - break; - default: - WARN_ON(true); - } - break; - default: - WARN_ON(true); - } - } - - dst->jm_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); - dst->shader_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); - dst->tiler_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); - dst->mmu_l2_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); - -void kbase_hwcnt_gpu_enable_map_from_physical( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_physical_enable_map *src) -{ - const struct kbase_hwcnt_metadata *metadata; - - u64 ignored_hi; - u64 jm_bm; - u64 shader_bm; - u64 tiler_bm; - u64 mmu_l2_bm; - size_t grp, blk, blk_inst; - - if (WARN_ON(!src) || WARN_ON(!dst)) - return; - - metadata = dst->metadata; - - kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->jm_bm, &jm_bm, &ignored_hi); - kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->shader_bm, &shader_bm, &ignored_hi); - kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->tiler_bm, &tiler_bm, &ignored_hi); - kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - const u64 grp_type = kbase_hwcnt_metadata_group_type( - metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type( - metadata, grp, blk); - const size_t blk_val_cnt = - kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - dst, grp, blk, blk_inst); - - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: - *blk_map = shader_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: - *blk_map = tiler_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: - *blk_map = mmu_l2_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - *blk_map = jm_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: - break; - default: - WARN_ON(true); - } - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - *blk_map = jm_bm; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - *blk_map = tiler_bm; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - *blk_map = shader_bm; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - *blk_map = mmu_l2_bm; - break; - default: - WARN_ON(true); - } - break; - default: - WARN_ON(true); - } - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); - -void kbase_hwcnt_gpu_patch_dump_headers( - struct kbase_hwcnt_dump_buffer *buf, - const struct kbase_hwcnt_enable_map *enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!buf) || WARN_ON(!enable_map) || - WARN_ON(buf->metadata != enable_map->metadata)) - return; - - metadata = buf->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - const u64 grp_type = - kbase_hwcnt_metadata_group_type(metadata, grp); - u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( - buf, grp, blk, blk_inst); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( - enable_map, grp, blk, blk_inst); - const u32 prfcnt_en = - kbasep_hwcnt_backend_gpu_block_map_to_physical( - blk_map[0], 0); - - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; - break; - default: - WARN_ON(true); - } - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.h deleted file mode 100644 index 509608a3d9b1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_gpu.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_HWCNT_GPU_H_ -#define _KBASE_HWCNT_GPU_H_ - -#include - -struct kbase_device; -struct kbase_hwcnt_metadata; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/** - * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to - * identify metadata groups. - * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. - * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. - */ -enum kbase_hwcnt_gpu_group_type { - KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, - KBASE_HWCNT_GPU_GROUP_TYPE_V5, -}; - -/** - * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, - * used to identify metadata blocks. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. - */ -enum kbase_hwcnt_gpu_v4_block_type { - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, -}; - -/** - * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, - * used to identify metadata blocks. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. - */ -enum kbase_hwcnt_gpu_v5_block_type { - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, -}; - -/** - * struct kbase_hwcnt_physical_enable_map - Representation of enable map - * directly used by GPU. - * @jm_bm: Job Manager counters selection bitmask. - * @shader_bm: Shader counters selection bitmask. - * @tiler_bm: Tiler counters selection bitmask. - * @mmu_l2_bm: MMU_L2 counters selection bitmask. - */ -struct kbase_hwcnt_physical_enable_map { - u32 jm_bm; - u32 shader_bm; - u32 tiler_bm; - u32 mmu_l2_bm; -}; - -/** - * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. - * @cg_count: Core group count. - * @cgs: Non-NULL pointer to array of cg_count coherent group structures. - * - * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, - * where each core group may have a physically different layout. - */ -struct kbase_hwcnt_gpu_v4_info { - size_t cg_count; - const struct mali_base_gpu_coherent_group *cgs; -}; - -/** - * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. - * @l2_count: L2 cache count. - * @core_mask: Shader core mask. May be sparse. - */ -struct kbase_hwcnt_gpu_v5_info { - size_t l2_count; - u64 core_mask; -}; - -/** - * struct kbase_hwcnt_gpu_info - Tagged union with information about the current - * GPU's hwcnt blocks. - * @type: GPU type. - * @v4: Info filled in if a v4 GPU. - * @v5: Info filled in if a v5 GPU. - */ -struct kbase_hwcnt_gpu_info { - enum kbase_hwcnt_gpu_group_type type; - union { - struct kbase_hwcnt_gpu_v4_info v4; - struct kbase_hwcnt_gpu_v5_info v5; - }; -}; - -/** - * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the - * hwcnt metadata. - * @kbdev: Non-NULL pointer to kbase device. - * @info: Non-NULL pointer to data structure to be filled in. - * - * The initialised info struct will only be valid for use while kbdev is valid. - */ -int kbase_hwcnt_gpu_info_init( - struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info); - -/** - * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the - * current GPU. - * @info: Non-NULL pointer to info struct initialised by - * kbase_hwcnt_gpu_info_init. - * @use_secondary: True if secondary performance counters should be used, else - * false. Ignored if secondary counters are not supported. - * @out_metadata: Non-NULL pointer to where created metadata is stored on - * success. - * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump - * buffer is stored on success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_gpu_metadata_create( - const struct kbase_hwcnt_gpu_info *info, - bool use_secondary, - const struct kbase_hwcnt_metadata **out_metadata, - size_t *out_dump_bytes); - -/** - * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. - * @metadata: Pointer to metadata to destroy. - */ -void kbase_hwcnt_gpu_metadata_destroy( - const struct kbase_hwcnt_metadata *metadata); - -/** - * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw - * dump buffer in src into the dump buffer - * abstraction in dst. - * @dst: Non-NULL pointer to dst dump buffer. - * @src: Non-NULL pointer to src raw dump buffer, of same length - * as returned in out_dump_bytes parameter of - * kbase_hwcnt_gpu_metadata_create. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @accumulate: True if counters in src should be accumulated into dst, - * rather than copied. - * - * The dst and dst_enable_map MUST have been created from the same metadata as - * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get - * the length of src. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_gpu_dump_get( - struct kbase_hwcnt_dump_buffer *dst, - void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - bool accumulate); - -/** - * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction - * into a physical enable map. - * @dst: Non-NULL pointer to dst physical enable map. - * @src: Non-NULL pointer to src enable map abstraction. - * - * The src must have been created from a metadata returned from a call to - * kbase_hwcnt_gpu_metadata_create. - * - * This is a lossy conversion, as the enable map abstraction has one bit per - * individual counter block value, but the physical enable map uses 1 bit for - * every 4 counters, shared over all instances of a block. - */ -void kbase_hwcnt_gpu_enable_map_to_physical( - struct kbase_hwcnt_physical_enable_map *dst, - const struct kbase_hwcnt_enable_map *src); - -/** - * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to - * an enable map abstraction. - * @dst: Non-NULL pointer to dst enable map abstraction. - * @src: Non-NULL pointer to src physical enable map. - * - * The dst must have been created from a metadata returned from a call to - * kbase_hwcnt_gpu_metadata_create. - * - * This is a lossy conversion, as the physical enable map can technically - * support counter blocks with 128 counters each, but no hardware actually uses - * more than 64, so the enable map abstraction has nowhere to store the enable - * information for the 64 non-existent counters. - */ -void kbase_hwcnt_gpu_enable_map_from_physical( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_physical_enable_map *src); - -/** - * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter - * enable headers in a dump buffer to - * reflect the specified enable map. - * @buf: Non-NULL pointer to dump buffer to patch. - * @enable_map: Non-NULL pointer to enable map. - * - * The buf and enable_map must have been created from a metadata returned from - * a call to kbase_hwcnt_gpu_metadata_create. - * - * This function should be used before handing off a dump buffer over the - * kernel-user boundary, to ensure the header is accurate for the enable map - * used by the user. - */ -void kbase_hwcnt_gpu_patch_dump_headers( - struct kbase_hwcnt_dump_buffer *buf, - const struct kbase_hwcnt_enable_map *enable_map); - -#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.c deleted file mode 100644 index b0e6aee1b135..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_legacy.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" - -#include -#include - -/** - * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. - * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. - * @enable_map: Counter enable map. - * @dump_buf: Dump buffer used to manipulate dumps before copied to user. - * @hvcli: Hardware counter virtualizer client. - */ -struct kbase_hwcnt_legacy_client { - void __user *user_dump_buf; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer dump_buf; - struct kbase_hwcnt_virtualizer_client *hvcli; -}; - -int kbase_hwcnt_legacy_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_ioctl_hwcnt_enable *enable, - struct kbase_hwcnt_legacy_client **out_hlcli) -{ - int errcode; - struct kbase_hwcnt_legacy_client *hlcli; - const struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_physical_enable_map phys_em; - - if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) - return -EINVAL; - - metadata = kbase_hwcnt_virtualizer_metadata(hvirt); - - hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); - if (!hlcli) - return -ENOMEM; - - hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; - - errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); - if (errcode) - goto error; - - /* Translate from the ioctl enable map to the internal one */ - phys_em.jm_bm = enable->jm_bm; - phys_em.shader_bm = enable->shader_bm; - phys_em.tiler_bm = enable->tiler_bm; - phys_em.mmu_l2_bm = enable->mmu_l2_bm; - kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); - - errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); - if (errcode) - goto error; - - errcode = kbase_hwcnt_virtualizer_client_create( - hvirt, &hlcli->enable_map, &hlcli->hvcli); - if (errcode) - goto error; - - *out_hlcli = hlcli; - return 0; - -error: - kbase_hwcnt_legacy_client_destroy(hlcli); - return errcode; -} - -void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) -{ - if (!hlcli) - return; - - kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); - kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); - kbase_hwcnt_enable_map_free(&hlcli->enable_map); - kfree(hlcli); -} - -int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) -{ - int errcode; - u64 ts_start_ns; - u64 ts_end_ns; - - if (!hlcli) - return -EINVAL; - - /* Dump into the kernel buffer */ - errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, - &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); - if (errcode) - return errcode; - - /* Patch the dump buf headers, to hide the counters that other hwcnt - * clients are using. - */ - kbase_hwcnt_gpu_patch_dump_headers( - &hlcli->dump_buf, &hlcli->enable_map); - - /* Zero all non-enabled counters (current values are undefined) */ - kbase_hwcnt_dump_buffer_zero_non_enabled( - &hlcli->dump_buf, &hlcli->enable_map); - - /* Copy into the user's buffer */ - errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf, - hlcli->dump_buf.metadata->dump_buf_bytes); - /* Non-zero errcode implies user buf was invalid or too small */ - if (errcode) - return -EFAULT; - - return 0; -} - -int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) -{ - u64 ts_start_ns; - u64 ts_end_ns; - - if (!hlcli) - return -EINVAL; - - /* Dump with a NULL buffer to clear this client's counters */ - return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, - &ts_start_ns, &ts_end_ns, NULL); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.h deleted file mode 100644 index 7a610ae378a2..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_legacy.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Legacy hardware counter interface, giving userspace clients simple, - * synchronous access to hardware counters. - * - * Any functions operating on an single legacy hardware counter client instance - * must be externally synchronised. - * Different clients may safely be used concurrently. - */ - -#ifndef _KBASE_HWCNT_LEGACY_H_ -#define _KBASE_HWCNT_LEGACY_H_ - -struct kbase_hwcnt_legacy_client; -struct kbase_ioctl_hwcnt_enable; -struct kbase_hwcnt_virtualizer; - -/** - * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. - * @hvirt: Non-NULL pointer to hardware counter virtualizer the client - * should be attached to. - * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid - * pointer to a user dump buffer large enough to hold a dump, and - * the counters that should be enabled. - * @out_hlcli: Non-NULL pointer to where the pointer to the created client will - * be stored on success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_ioctl_hwcnt_enable *enable, - struct kbase_hwcnt_legacy_client **out_hlcli); - -/** - * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter - * client. - * @hlcli: Pointer to the legacy hardware counter client. - * - * Will safely destroy a client in any partial state of construction. - */ -void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); - -/** - * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the - * client's user buffer. - * @hlcli: Non-NULL pointer to the legacy hardware counter client. - * - * This function will synchronously dump hardware counters into the user buffer - * specified on client creation, with the counters specified on client creation. - * - * The counters are automatically cleared after each dump, such that the next - * dump performed will return the counter values accumulated between the time of - * this function call and the next dump. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); - -/** - * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter - * dump. - * @hlcli: Non-NULL pointer to the legacy hardware counter client. - * - * This function will synchronously clear the hardware counters, such that the - * next dump performed will return the counter values accumulated between the - * time of this function call and the next dump. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); - -#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_reader.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_reader.h deleted file mode 100755 index 10706b8d2548..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_reader.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_HWCNT_READER_H_ -#define _KBASE_HWCNT_READER_H_ - -/* The ids of ioctl commands. */ -#define KBASE_HWCNT_READER 0xBE -#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) -#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) -#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) -#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) -#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ - struct kbase_hwcnt_reader_metadata) -#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) -#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) -#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) -#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) - -/** - * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata - * @timestamp: time when sample was collected - * @event_id: id of an event that triggered sample collection - * @buffer_idx: position in sampling area where sample buffer was stored - */ -struct kbase_hwcnt_reader_metadata { - u64 timestamp; - u32 event_id; - u32 buffer_idx; -}; - -/** - * enum base_hwcnt_reader_event - hwcnt dumping events - * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump - * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump - * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request - * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request - * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events - */ -enum base_hwcnt_reader_event { - BASE_HWCNT_READER_EVENT_MANUAL, - BASE_HWCNT_READER_EVENT_PERIODIC, - BASE_HWCNT_READER_EVENT_PREJOB, - BASE_HWCNT_READER_EVENT_POSTJOB, - - BASE_HWCNT_READER_EVENT_COUNT -}; - -#endif /* _KBASE_HWCNT_READER_H_ */ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.c deleted file mode 100644 index 1e9efde97c59..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.c +++ /dev/null @@ -1,538 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" - -/* Minimum alignment of each block of hardware counters */ -#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ - (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) - -/** - * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. - * @value: The value to align upwards. - * @alignment: The alignment. - * - * Return: A number greater than or equal to value that is aligned to alignment. - */ -#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ - (value + ((alignment - (value % alignment)) % alignment)) - -int kbase_hwcnt_metadata_create( - const struct kbase_hwcnt_description *desc, - const struct kbase_hwcnt_metadata **out_metadata) -{ - char *buf; - struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_group_metadata *grp_mds; - size_t grp; - size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ - size_t dump_buf_count; /* Number of u32 values (inc padding) */ - size_t avail_mask_bits; /* Number of availability mask bits */ - - size_t size; - size_t offset; - - if (!desc || !out_metadata) - return -EINVAL; - - /* Calculate the bytes needed to tightly pack the metadata */ - - /* Top level metadata */ - size = 0; - size += sizeof(struct kbase_hwcnt_metadata); - - /* Group metadata */ - size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; - - /* Block metadata */ - for (grp = 0; grp < desc->grp_cnt; grp++) { - size += sizeof(struct kbase_hwcnt_block_metadata) * - desc->grps[grp].blk_cnt; - } - - /* Single allocation for the entire metadata */ - buf = kmalloc(size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - /* Use the allocated memory for the metadata and its members */ - - /* Bump allocate the top level metadata */ - offset = 0; - metadata = (struct kbase_hwcnt_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_metadata); - - /* Bump allocate the group metadata */ - grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; - - enable_map_count = 0; - dump_buf_count = 0; - avail_mask_bits = 0; - - for (grp = 0; grp < desc->grp_cnt; grp++) { - size_t blk; - - const struct kbase_hwcnt_group_description *grp_desc = - desc->grps + grp; - struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; - - size_t group_enable_map_count = 0; - size_t group_dump_buffer_count = 0; - size_t group_avail_mask_bits = 0; - - /* Bump allocate this group's block metadata */ - struct kbase_hwcnt_block_metadata *blk_mds = - (struct kbase_hwcnt_block_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_block_metadata) * - grp_desc->blk_cnt; - - /* Fill in each block in the group's information */ - for (blk = 0; blk < grp_desc->blk_cnt; blk++) { - const struct kbase_hwcnt_block_description *blk_desc = - grp_desc->blks + blk; - struct kbase_hwcnt_block_metadata *blk_md = - blk_mds + blk; - const size_t n_values = - blk_desc->hdr_cnt + blk_desc->ctr_cnt; - - blk_md->type = blk_desc->type; - blk_md->inst_cnt = blk_desc->inst_cnt; - blk_md->hdr_cnt = blk_desc->hdr_cnt; - blk_md->ctr_cnt = blk_desc->ctr_cnt; - blk_md->enable_map_index = group_enable_map_count; - blk_md->enable_map_stride = - kbase_hwcnt_bitfield_count(n_values); - blk_md->dump_buf_index = group_dump_buffer_count; - blk_md->dump_buf_stride = - KBASE_HWCNT_ALIGN_UPWARDS( - n_values, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); - blk_md->avail_mask_index = group_avail_mask_bits; - - group_enable_map_count += - blk_md->enable_map_stride * blk_md->inst_cnt; - group_dump_buffer_count += - blk_md->dump_buf_stride * blk_md->inst_cnt; - group_avail_mask_bits += blk_md->inst_cnt; - } - - /* Fill in the group's information */ - grp_md->type = grp_desc->type; - grp_md->blk_cnt = grp_desc->blk_cnt; - grp_md->blk_metadata = blk_mds; - grp_md->enable_map_index = enable_map_count; - grp_md->dump_buf_index = dump_buf_count; - grp_md->avail_mask_index = avail_mask_bits; - - enable_map_count += group_enable_map_count; - dump_buf_count += group_dump_buffer_count; - avail_mask_bits += group_avail_mask_bits; - } - - /* Fill in the top level metadata's information */ - metadata->grp_cnt = desc->grp_cnt; - metadata->grp_metadata = grp_mds; - metadata->enable_map_bytes = - enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; - metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; - metadata->avail_mask = desc->avail_mask; - - WARN_ON(size != offset); - /* Due to the block alignment, there should be exactly one enable map - * bit per 4 bytes in the dump buffer. - */ - WARN_ON(metadata->dump_buf_bytes != - (metadata->enable_map_bytes * - BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); - - *out_metadata = metadata; - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); - -void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) -{ - kfree(metadata); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); - -int kbase_hwcnt_enable_map_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_enable_map *enable_map) -{ - u64 *enable_map_buf; - - if (!metadata || !enable_map) - return -EINVAL; - - enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); - if (!enable_map_buf) - return -ENOMEM; - - enable_map->metadata = metadata; - enable_map->enable_map = enable_map_buf; - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); - -void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) -{ - if (!enable_map) - return; - - kfree(enable_map->enable_map); - enable_map->enable_map = NULL; - enable_map->metadata = NULL; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); - -int kbase_hwcnt_dump_buffer_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - u32 *buf; - - if (!metadata || !dump_buf) - return -EINVAL; - - buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - dump_buf->metadata = metadata; - dump_buf->dump_buf = buf; - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); - -void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) -{ - if (!dump_buf) - return; - - kfree(dump_buf->dump_buf); - memset(dump_buf, 0, sizeof(*dump_buf)); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); - -int kbase_hwcnt_dump_buffer_array_alloc( - const struct kbase_hwcnt_metadata *metadata, - size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - struct kbase_hwcnt_dump_buffer *buffers; - size_t buf_idx; - unsigned int order; - unsigned long addr; - - if (!metadata || !dump_bufs) - return -EINVAL; - - /* Allocate memory for the dump buffer struct array */ - buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); - if (!buffers) - return -ENOMEM; - - /* Allocate pages for the actual dump buffers, as they tend to be fairly - * large. - */ - order = get_order(metadata->dump_buf_bytes * n); - addr = __get_free_pages(GFP_KERNEL, order); - - if (!addr) { - kfree(buffers); - return -ENOMEM; - } - - dump_bufs->page_addr = addr; - dump_bufs->page_order = order; - dump_bufs->buf_cnt = n; - dump_bufs->bufs = buffers; - - /* Set the buffer of each dump buf */ - for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t offset = metadata->dump_buf_bytes * buf_idx; - - buffers[buf_idx].metadata = metadata; - buffers[buf_idx].dump_buf = (u32 *)(addr + offset); - } - - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); - -void kbase_hwcnt_dump_buffer_array_free( - struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - if (!dump_bufs) - return; - - kfree(dump_bufs->bufs); - free_pages(dump_bufs->page_addr, dump_bufs->page_order); - memset(dump_bufs, 0, sizeof(*dump_bufs)); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); - -void kbase_hwcnt_dump_buffer_zero( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk; - size_t val_cnt; - - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) - continue; - - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - - kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); - -void kbase_hwcnt_dump_buffer_zero_strict( - struct kbase_hwcnt_dump_buffer *dst) -{ - if (WARN_ON(!dst)) - return; - - memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); - -void kbase_hwcnt_dump_buffer_zero_non_enabled( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - - /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); - - if (kbase_hwcnt_metadata_block_instance_avail( - metadata, grp, blk, blk_inst)) { - /* Block available, so only zero non-enabled values */ - kbase_hwcnt_dump_buffer_block_zero_non_enabled( - dst_blk, blk_em, val_cnt); - } else { - /* Block not available, so zero the entire thing */ - kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); - } - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); - -void kbase_hwcnt_dump_buffer_copy( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || - WARN_ON(dst->metadata != src->metadata) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk; - const u32 *src_blk; - size_t val_cnt; - - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) - continue; - - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - - kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); - -void kbase_hwcnt_dump_buffer_copy_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || - WARN_ON(dst->metadata != src->metadata) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); - /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); - - kbase_hwcnt_dump_buffer_block_copy_strict( - dst_blk, src_blk, blk_em, val_cnt); - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); - -void kbase_hwcnt_dump_buffer_accumulate( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || - WARN_ON(dst->metadata != src->metadata) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk; - const u32 *src_blk; - size_t hdr_cnt; - size_t ctr_cnt; - - if (!kbase_hwcnt_enable_map_block_enabled( - dst_enable_map, grp, blk, blk_inst)) - continue; - - dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); - ctr_cnt = kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); - - kbase_hwcnt_dump_buffer_block_accumulate( - dst_blk, src_blk, hdr_cnt, ctr_cnt); - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); - -void kbase_hwcnt_dump_buffer_accumulate_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; - - if (WARN_ON(!dst) || - WARN_ON(!src) || - WARN_ON(!dst_enable_map) || - WARN_ON(dst == src) || - WARN_ON(dst->metadata != src->metadata) || - WARN_ON(dst->metadata != dst_enable_map->metadata)) - return; - - metadata = dst->metadata; - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { - u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( - dst, grp, blk, blk_inst); - const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( - metadata, grp, blk); - size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( - metadata, grp, blk); - /* Align upwards to include padding bytes */ - ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); - - kbase_hwcnt_dump_buffer_block_accumulate_strict( - dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); - } -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.h deleted file mode 100644 index 4d78c8457574..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_types.h +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Hardware counter types. - * Contains structures for describing the physical layout of hardware counter - * dump buffers and enable maps within a system. - * - * Also contains helper functions for manipulation of these dump buffers and - * enable maps. - * - * Through use of these structures and functions, hardware counters can be - * enabled, copied, accumulated, and generally manipulated in a generic way, - * regardless of the physical counter dump layout. - * - * Terminology: - * - * Hardware Counter System: - * A collection of hardware counter groups, making a full hardware counter - * system. - * Hardware Counter Group: - * A group of Hardware Counter Blocks (e.g. a t62x might have more than one - * core group, so has one counter group per core group, where each group - * may have a different number and layout of counter blocks). - * Hardware Counter Block: - * A block of hardware counters (e.g. shader block, tiler block). - * Hardware Counter Block Instance: - * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have - * 4 shader block instances). - * - * Block Header: - * A header value inside a counter block. Headers don't count anything, - * so it is only valid to copy or zero them. Headers are always the first - * values in the block. - * Block Counter: - * A counter value inside a counter block. Counters can be zeroed, copied, - * or accumulated. Counters are always immediately after the headers in the - * block. - * Block Value: - * A catch-all term for block headers and block counters. - * - * Enable Map: - * An array of u64 bitfields, where each bit either enables exactly one - * block value, or is unused (padding). - * Dump Buffer: - * An array of u32 values, where each u32 corresponds either to one block - * value, or is unused (padding). - * Availability Mask: - * A bitfield, where each bit corresponds to whether a block instance is - * physically available (e.g. an MP3 GPU may have a sparse core mask of - * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the - * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this - * case, the availability mask might be 0b1011111 (the exact layout will - * depend on the specific hardware architecture), with the 3 extra early bits - * corresponding to other block instances in the hardware counter system). - * Metadata: - * Structure describing the physical layout of the enable map and dump buffers - * for a specific hardware counter system. - * - */ - -#ifndef _KBASE_HWCNT_TYPES_H_ -#define _KBASE_HWCNT_TYPES_H_ - -#include -#include -#include -#include -#include "mali_malisw.h" - -/* Number of bytes in each bitfield */ -#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) - -/* Number of bits in each bitfield */ -#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) - -/* Number of bytes for each counter value */ -#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32)) - -/* Number of bits in an availability mask (i.e. max total number of block - * instances supported in a Hardware Counter System) - */ -#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) - -/** - * struct kbase_hwcnt_block_description - Description of one or more identical, - * contiguous, Hardware Counter Blocks. - * @type: The arbitrary identifier used to identify the type of the block. - * @inst_cnt: The number of Instances of the block. - * @hdr_cnt: The number of 32-bit Block Headers in the block. - * @ctr_cnt: The number of 32-bit Block Counters in the block. - */ -struct kbase_hwcnt_block_description { - u64 type; - size_t inst_cnt; - size_t hdr_cnt; - size_t ctr_cnt; -}; - -/** - * struct kbase_hwcnt_group_description - Description of one or more identical, - * contiguous Hardware Counter Groups. - * @type: The arbitrary identifier used to identify the type of the group. - * @blk_cnt: The number of types of Hardware Counter Block in the group. - * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, - * describing each type of Hardware Counter Block in the group. - */ -struct kbase_hwcnt_group_description { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_description *blks; -}; - -/** - * struct kbase_hwcnt_description - Description of a Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, - * describing each Hardware Counter Group in the system. - * @avail_mask: Flat Availability Mask for all block instances in the system. - */ -struct kbase_hwcnt_description { - size_t grp_cnt; - const struct kbase_hwcnt_group_description *grps; - u64 avail_mask; -}; - -/** - * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout - * of a block in a Hardware Counter System's - * Dump Buffers and Enable Maps. - * @type: The arbitrary identifier used to identify the type of the - * block. - * @inst_cnt: The number of Instances of the block. - * @hdr_cnt: The number of 32-bit Block Headers in the block. - * @ctr_cnt: The number of 32-bit Block Counters in the block. - * @enable_map_index: Index in u64s into the parent's Enable Map where the - * Enable Map bitfields of the Block Instances described by - * this metadata start. - * @enable_map_stride: Stride in u64s between the Enable Maps of each of the - * Block Instances described by this metadata. - * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the - * Dump Buffers of the Block Instances described by this - * metadata start. - * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the - * Block Instances described by this metadata. - * @avail_mask_index: Index in bits into the parent's Availability Mask where - * the Availability Masks of the Block Instances described - * by this metadata start. - */ -struct kbase_hwcnt_block_metadata { - u64 type; - size_t inst_cnt; - size_t hdr_cnt; - size_t ctr_cnt; - size_t enable_map_index; - size_t enable_map_stride; - size_t dump_buf_index; - size_t dump_buf_stride; - size_t avail_mask_index; -}; - -/** - * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout - * of a group of blocks in a Hardware - * Counter System's Dump Buffers and Enable - * Maps. - * @type: The arbitrary identifier used to identify the type of the - * group. - * @blk_cnt: The number of types of Hardware Counter Block in the - * group. - * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, - * describing the physical layout of each type of Hardware - * Counter Block in the group. - * @enable_map_index: Index in u64s into the parent's Enable Map where the - * Enable Maps of the blocks within the group described by - * this metadata start. - * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the - * Dump Buffers of the blocks within the group described by - * metadata start. - * @avail_mask_index: Index in bits into the parent's Availability Mask where - * the Availability Masks of the blocks within the group - * described by this metadata start. - */ -struct kbase_hwcnt_group_metadata { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_metadata *blk_metadata; - size_t enable_map_index; - size_t dump_buf_index; - size_t avail_mask_index; -}; - -/** - * struct kbase_hwcnt_metadata - Metadata describing the physical layout - * of Dump Buffers and Enable Maps within a - * Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, - * describing the physical layout of each Hardware Counter - * Group in the system. - * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. - * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. - * @avail_mask: The Availability Mask for the system. - */ -struct kbase_hwcnt_metadata { - size_t grp_cnt; - const struct kbase_hwcnt_group_metadata *grp_metadata; - size_t enable_map_bytes; - size_t dump_buf_bytes; - u64 avail_mask; -}; - -/** - * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 - * bitfields. - * @metadata: Non-NULL pointer to metadata used to identify, and to describe - * the layout of the enable map. - * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array - * of u64 bitfields, each bit of which enables one hardware - * counter. - */ -struct kbase_hwcnt_enable_map { - const struct kbase_hwcnt_metadata *metadata; - u64 *enable_map; -}; - -/** - * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32 - * values. - * @metadata: Non-NULL pointer to metadata used to identify, and to describe - * the layout of the Dump Buffer. - * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array - * of u32 values. - */ -struct kbase_hwcnt_dump_buffer { - const struct kbase_hwcnt_metadata *metadata; - u32 *dump_buf; -}; - -/** - * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. - * @page_addr: Address of allocated pages. A single allocation is used for all - * Dump Buffers in the array. - * @page_order: The allocation order of the pages. - * @buf_cnt: The number of allocated Dump Buffers. - * @bufs: Non-NULL pointer to the array of Dump Buffers. - */ -struct kbase_hwcnt_dump_buffer_array { - unsigned long page_addr; - unsigned int page_order; - size_t buf_cnt; - struct kbase_hwcnt_dump_buffer *bufs; -}; - -/** - * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object - * from a description. - * @desc: Non-NULL pointer to a hardware counter description. - * @metadata: Non-NULL pointer to where created metadata will be stored on - * success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_metadata_create( - const struct kbase_hwcnt_description *desc, - const struct kbase_hwcnt_metadata **metadata); - -/** - * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. - * @metadata: Pointer to hardware counter metadata - */ -void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); - -/** - * kbase_hwcnt_metadata_group_count() - Get the number of groups. - * @metadata: Non-NULL pointer to metadata. - * - * Return: Number of hardware counter groups described by metadata. - */ -#define kbase_hwcnt_metadata_group_count(metadata) \ - ((metadata)->grp_cnt) - -/** - * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * - * Return: Type of the group grp. - */ -#define kbase_hwcnt_metadata_group_type(metadata, grp) \ - ((metadata)->grp_metadata[(grp)].type) - -/** - * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * - * Return: Number of blocks in group grp. - */ -#define kbase_hwcnt_metadata_block_count(metadata, grp) \ - ((metadata)->grp_metadata[(grp)].blk_cnt) - -/** - * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: Type of the block blk in group grp. - */ -#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \ - ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type) - -/** - * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of - * a block. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: Number of instances of block blk in group grp. - */ -#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \ - ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt) - -/** - * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter - * headers. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: Number of u32 counter headers in each instance of block blk in - * group grp. - */ -#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ - ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) - -/** - * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: Number of u32 counters in each instance of block blk in group - * grp. - */ -#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ - ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) - -/** - * kbase_hwcnt_metadata_block_values_count() - Get the number of values. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: Number of u32 headers plus counters in each instance of block blk - * in group grp. - */ -#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ - (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \ - + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk))) - -/** - * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in - * the metadata. - * @md: Non-NULL pointer to metadata. - * @grp: size_t variable used as group iterator. - * @blk: size_t variable used as block iterator. - * @blk_inst: size_t variable used as block instance iterator. - * - * Iteration order is group, then block, then block instance (i.e. linearly - * through memory). - */ -#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ - for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ - for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ - for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) - -/** - * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail - * mask corresponding to the block. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * - * Return: The bit index into the avail mask for the block. - */ -static inline size_t kbase_hwcnt_metadata_block_avail_bit( - const struct kbase_hwcnt_metadata *metadata, - size_t grp, - size_t blk) -{ - const size_t bit = - metadata->grp_metadata[grp].avail_mask_index + - metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; - - return bit; -} - -/** - * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is - * available. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: true if the block instance is available, else false. - */ -static inline bool kbase_hwcnt_metadata_block_instance_avail( - const struct kbase_hwcnt_metadata *metadata, - size_t grp, - size_t blk, - size_t blk_inst) -{ - const size_t bit = kbase_hwcnt_metadata_block_avail_bit( - metadata, grp, blk) + blk_inst; - const u64 mask = 1ull << bit; - - return (metadata->avail_mask & mask) != 0; -} - -/** - * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. - * @metadata: Non-NULL pointer to metadata describing the system. - * @enable_map: Non-NULL pointer to enable map to be initialised. Will be - * initialised to all zeroes (i.e. all counters disabled). - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_enable_map_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_enable_map *enable_map); - -/** - * kbase_hwcnt_enable_map_free() - Free an enable map. - * @enable_map: Enable map to be freed. - * - * Can be safely called on an all-zeroed enable map structure, or on an already - * freed enable map. - */ -void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); - -/** - * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block - * instance's enable map. - * @map: Non-NULL pointer to (const) enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: (const) u64* to the bitfield(s) used as the enable map for the - * block instance. - */ -#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ - ((map)->enable_map + \ - (map)->metadata->grp_metadata[(grp)].enable_map_index + \ - (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ - (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) - -/** - * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required - * to have at minimum one bit per value. - * @val_cnt: Number of values. - * - * Return: Number of required bitfields. - */ -static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) -{ - return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / - KBASE_HWCNT_BITFIELD_BITS; -} - -/** - * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. - * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - */ -static inline void kbase_hwcnt_enable_map_block_disable_all( - struct kbase_hwcnt_enable_map *dst, - size_t grp, - size_t blk, - size_t blk_inst) -{ - const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - dst->metadata, grp, blk); - const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); - u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( - dst, grp, blk, blk_inst); - - memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); -} - -/** - * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. - * @dst: Non-NULL pointer to enable map to zero. - */ -static inline void kbase_hwcnt_enable_map_disable_all( - struct kbase_hwcnt_enable_map *dst) -{ - memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); -} - -/** - * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. - * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - */ -static inline void kbase_hwcnt_enable_map_block_enable_all( - struct kbase_hwcnt_enable_map *dst, - size_t grp, - size_t blk, - size_t blk_inst) -{ - const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - dst->metadata, grp, blk); - const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); - u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( - dst, grp, blk, blk_inst); - - size_t bitfld_idx; - - for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { - const u64 remaining_values = val_cnt - - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); - u64 block_enable_map_mask = U64_MAX; - - if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) - block_enable_map_mask = (1ull << remaining_values) - 1; - - block_enable_map[bitfld_idx] = block_enable_map_mask; - } -} - -/** - * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable - * map. - * @dst: Non-NULL pointer to enable map. - */ -static inline void kbase_hwcnt_enable_map_enable_all( - struct kbase_hwcnt_enable_map *dst) -{ - size_t grp, blk, blk_inst; - - kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) - kbase_hwcnt_enable_map_block_enable_all( - dst, grp, blk, blk_inst); -} - -/** - * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. - * @dst: Non-NULL pointer to destination enable map. - * @src: Non-NULL pointer to source enable map. - * - * The dst and src MUST have been created from the same metadata. - */ -static inline void kbase_hwcnt_enable_map_copy( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) -{ - memcpy(dst->enable_map, - src->enable_map, - dst->metadata->enable_map_bytes); -} - -/** - * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. - * @dst: Non-NULL pointer to destination enable map. - * @src: Non-NULL pointer to source enable map. - * - * The dst and src MUST have been created from the same metadata. - */ -static inline void kbase_hwcnt_enable_map_union( - struct kbase_hwcnt_enable_map *dst, - const struct kbase_hwcnt_enable_map *src) -{ - const size_t bitfld_count = - dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; - size_t i; - - for (i = 0; i < bitfld_count; i++) - dst->enable_map[i] |= src->enable_map[i]; -} - -/** - * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block - * instance are enabled. - * @enable_map: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: true if any values in the block are enabled, else false. - */ -static inline bool kbase_hwcnt_enable_map_block_enabled( - const struct kbase_hwcnt_enable_map *enable_map, - size_t grp, - size_t blk, - size_t blk_inst) -{ - bool any_enabled = false; - const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( - enable_map->metadata, grp, blk); - const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); - const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( - enable_map, grp, blk, blk_inst); - - size_t bitfld_idx; - - for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { - const u64 remaining_values = val_cnt - - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); - u64 block_enable_map_mask = U64_MAX; - - if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) - block_enable_map_mask = (1ull << remaining_values) - 1; - - any_enabled = any_enabled || - (block_enable_map[bitfld_idx] & block_enable_map_mask); - } - - return any_enabled; -} - -/** - * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. - * @enable_map: Non-NULL pointer to enable map. - * - * Return: true if any values are enabled, else false. - */ -static inline bool kbase_hwcnt_enable_map_any_enabled( - const struct kbase_hwcnt_enable_map *enable_map) -{ - size_t grp, blk, blk_inst; - - kbase_hwcnt_metadata_for_each_block( - enable_map->metadata, grp, blk, blk_inst) { - if (kbase_hwcnt_enable_map_block_enabled( - enable_map, grp, blk, blk_inst)) - return true; - } - - return false; -} - -/** - * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block - * instance is enabled. - * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_idx: Index of the value to check in the block instance. - * - * Return: true if the value was enabled, else false. - */ -static inline bool kbase_hwcnt_enable_map_block_value_enabled( - const u64 *bitfld, - size_t val_idx) -{ - const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; - const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; - const u64 mask = 1ull << bit; - - return (bitfld[idx] & mask) != 0; -} - -/** - * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block - * instance. - * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_idx: Index of the value to enable in the block instance. - */ -static inline void kbase_hwcnt_enable_map_block_enable_value( - u64 *bitfld, - size_t val_idx) -{ - const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; - const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; - const u64 mask = 1ull << bit; - - bitfld[idx] |= mask; -} - -/** - * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block - * instance. - * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_idx: Index of the value to disable in the block instance. - */ -static inline void kbase_hwcnt_enable_map_block_disable_value( - u64 *bitfld, - size_t val_idx) -{ - const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; - const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; - const u64 mask = 1ull << bit; - - bitfld[idx] &= ~mask; -} - -/** - * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. - * @metadata: Non-NULL pointer to metadata describing the system. - * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be - * initialised to undefined values, so must be used as a copy dest, - * or cleared before use. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_dump_buffer *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. - * @dump_buf: Dump buffer to be freed. - * - * Can be safely called on an all-zeroed dump buffer structure, or on an already - * freed dump buffer. - */ -void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. - * @metadata: Non-NULL pointer to metadata describing the system. - * @n: Number of dump buffers to allocate - * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each - * dump buffer in the array will be initialised to undefined values, - * so must be used as a copy dest, or cleared before use. - * - * A single contiguous page allocation will be used for all of the buffers - * inside the array, where: - * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_array_alloc( - const struct kbase_hwcnt_metadata *metadata, - size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. - * @dump_bufs: Dump buffer array to be freed. - * - * Can be safely called on an all-zeroed dump buffer array structure, or on an - * already freed dump buffer array. - */ -void kbase_hwcnt_dump_buffer_array_free( - struct kbase_hwcnt_dump_buffer_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block - * instance's dump buffer. - * @buf: Non-NULL pointer to (const) dump buffer. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: (const) u32* to the dump buffer for the block instance. - */ -#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \ - ((buf)->dump_buf + \ - (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \ - (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \ - (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst)) - -/** - * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. - * After the operation, all non-enabled values - * will be undefined. - * @dst: Non-NULL pointer to dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst and dst_enable_map MUST have been created from the same metadata. - */ -void kbase_hwcnt_dump_buffer_zero( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @val_cnt: Number of values in the block. - */ -static inline void kbase_hwcnt_dump_buffer_block_zero( - u32 *dst_blk, - size_t val_cnt) -{ - memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); -} - -/** - * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. - * After the operation, all values - * (including padding bytes) will be - * zero. - * Slower than the non-strict variant. - * @dst: Non-NULL pointer to dump buffer. - */ -void kbase_hwcnt_dump_buffer_zero_strict( - struct kbase_hwcnt_dump_buffer *dst); - -/** - * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in - * dst (including padding bytes and - * unavailable blocks). - * After the operation, all enabled - * values will be unchanged. - * @dst: Non-NULL pointer to dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst and dst_enable_map MUST have been created from the same metadata. - */ -void kbase_hwcnt_dump_buffer_zero_non_enabled( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled - * values in a block. - * After the operation, all - * enabled values will be - * unchanged. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_cnt: Number of values in the block. - */ -static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled( - u32 *dst_blk, - const u64 *blk_em, - size_t val_cnt) -{ - size_t val; - - for (val = 0; val < val_cnt; val++) { - if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) - dst_blk[val] = 0; - } -} - -/** - * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. - * After the operation, all non-enabled values - * will be undefined. - * @dst: Non-NULL pointer to dst dump buffer. - * @src: Non-NULL pointer to src dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst, src, and dst_enable_map MUST have been created from the same - * metadata. - */ -void kbase_hwcnt_dump_buffer_copy( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @src_blk: Non-NULL pointer to src block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @val_cnt: Number of values in the block. - */ -static inline void kbase_hwcnt_dump_buffer_block_copy( - u32 *dst_blk, - const u32 *src_blk, - size_t val_cnt) -{ - /* Copy all the counters in the block instance. - * Values of non-enabled counters are undefined. - */ - memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); -} - -/** - * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to - * dst. - * After the operation, all non-enabled - * values (including padding bytes) will - * be zero. - * Slower than the non-strict variant. - * @dst: Non-NULL pointer to dst dump buffer. - * @src: Non-NULL pointer to src dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst, src, and dst_enable_map MUST have been created from the same - * metadata. - */ -void kbase_hwcnt_dump_buffer_copy_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values - * from src to dst. - * After the operation, all - * non-enabled values will be - * zero. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @src_blk: Non-NULL pointer to src block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_cnt: Number of values in the block. - * - * After the copy, any disabled values in dst will be zero. - */ -static inline void kbase_hwcnt_dump_buffer_block_copy_strict( - u32 *dst_blk, - const u32 *src_blk, - const u64 *blk_em, - size_t val_cnt) -{ - size_t val; - - for (val = 0; val < val_cnt; val++) { - bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( - blk_em, val); - - dst_blk[val] = val_enabled ? src_blk[val] : 0; - } -} - -/** - * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and - * accumulate all enabled counters from - * src to dst. - * After the operation, all non-enabled - * values will be undefined. - * @dst: Non-NULL pointer to dst dump buffer. - * @src: Non-NULL pointer to src dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst, src, and dst_enable_map MUST have been created from the same - * metadata. - */ -void kbase_hwcnt_dump_buffer_accumulate( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and - * accumulate all block counters - * from src to dst. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @src_blk: Non-NULL pointer to src block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @hdr_cnt: Number of headers in the block. - * @ctr_cnt: Number of counters in the block. - */ -static inline void kbase_hwcnt_dump_buffer_block_accumulate( - u32 *dst_blk, - const u32 *src_blk, - size_t hdr_cnt, - size_t ctr_cnt) -{ - size_t ctr; - /* Copy all the headers in the block instance. - * Values of non-enabled headers are undefined. - */ - memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); - - /* Accumulate all the counters in the block instance. - * Values of non-enabled counters are undefined. - */ - for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { - u32 *dst_ctr = dst_blk + ctr; - const u32 *src_ctr = src_blk + ctr; - - const u32 src_counter = *src_ctr; - const u32 dst_counter = *dst_ctr; - - /* Saturating add */ - u32 accumulated = src_counter + dst_counter; - - if (accumulated < src_counter) - accumulated = U32_MAX; - - *dst_ctr = accumulated; - } -} - -/** - * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and - * accumulate all enabled counters - * from src to dst. - * After the operation, all - * non-enabled values (including - * padding bytes) will be zero. - * Slower than the non-strict - * variant. - * @dst: Non-NULL pointer to dst dump buffer. - * @src: Non-NULL pointer to src dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * The dst, src, and dst_enable_map MUST have been created from the same - * metadata. - */ -void kbase_hwcnt_dump_buffer_accumulate_strict( - struct kbase_hwcnt_dump_buffer *dst, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -/** - * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block - * headers and accumulate - * all block counters from - * src to dst. - * After the operation, all - * non-enabled values will - * be zero. - * @dst_blk: Non-NULL pointer to dst block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @src_blk: Non-NULL pointer to src block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @hdr_cnt: Number of headers in the block. - * @ctr_cnt: Number of counters in the block. - */ -static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( - u32 *dst_blk, - const u32 *src_blk, - const u64 *blk_em, - size_t hdr_cnt, - size_t ctr_cnt) -{ - size_t ctr; - - kbase_hwcnt_dump_buffer_block_copy_strict( - dst_blk, src_blk, blk_em, hdr_cnt); - - for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { - bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( - blk_em, ctr); - - u32 *dst_ctr = dst_blk + ctr; - const u32 *src_ctr = src_blk + ctr; - - const u32 src_counter = *src_ctr; - const u32 dst_counter = *dst_ctr; - - /* Saturating add */ - u32 accumulated = src_counter + dst_counter; - - if (accumulated < src_counter) - accumulated = U32_MAX; - - *dst_ctr = ctr_enabled ? accumulated : 0; - } -} - -#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.c deleted file mode 100644 index 26e9852177cb..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_accumulator.h" -#include "mali_kbase_hwcnt_context.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_malisw.h" -#include "mali_kbase_debug.h" -#include "mali_kbase_linux.h" - -#include -#include - -/** - * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. - * @hctx: Hardware counter context being virtualized. - * @metadata: Hardware counter metadata. - * @lock: Lock acquired at all entrypoints, to protect mutable state. - * @client_count: Current number of virtualizer clients. - * @clients: List of virtualizer clients. - * @accum: Hardware counter accumulator. NULL if no clients. - * @scratch_map: Enable map used as scratch space during counter changes. - * @scratch_buf: Dump buffer used as scratch space during dumps. - */ -struct kbase_hwcnt_virtualizer { - struct kbase_hwcnt_context *hctx; - const struct kbase_hwcnt_metadata *metadata; - struct mutex lock; - size_t client_count; - struct list_head clients; - struct kbase_hwcnt_accumulator *accum; - struct kbase_hwcnt_enable_map scratch_map; - struct kbase_hwcnt_dump_buffer scratch_buf; -}; - -/** - * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. - * @node: List node used for virtualizer client list. - * @hvirt: Hardware counter virtualizer. - * @enable_map: Enable map with client's current enabled counters. - * @accum_buf: Dump buffer with client's current accumulated counters. - * @has_accum: True if accum_buf contains any accumulated counters. - * @ts_start_ns: Counter collection start time of current dump. - */ -struct kbase_hwcnt_virtualizer_client { - struct list_head node; - struct kbase_hwcnt_virtualizer *hvirt; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer accum_buf; - bool has_accum; - u64 ts_start_ns; -}; - -const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( - struct kbase_hwcnt_virtualizer *hvirt) -{ - if (!hvirt) - return NULL; - - return hvirt->metadata; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); - -/** - * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. - * @hvcli: Pointer to virtualizer client. - * - * Will safely free a client in any partial state of construction. - */ -static void kbasep_hwcnt_virtualizer_client_free( - struct kbase_hwcnt_virtualizer_client *hvcli) -{ - if (!hvcli) - return; - - kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); - kbase_hwcnt_enable_map_free(&hvcli->enable_map); - kfree(hvcli); -} - -/** - * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer - * client. - * @metadata: Non-NULL pointer to counter metadata. - * @out_hvcli: Non-NULL pointer to where created client will be stored on - * success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_virtualizer_client_alloc( - const struct kbase_hwcnt_metadata *metadata, - struct kbase_hwcnt_virtualizer_client **out_hvcli) -{ - int errcode; - struct kbase_hwcnt_virtualizer_client *hvcli = NULL; - - WARN_ON(!metadata); - WARN_ON(!out_hvcli); - - hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); - if (!hvcli) - return -ENOMEM; - - errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); - if (errcode) - goto error; - - errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); - if (errcode) - goto error; - - *out_hvcli = hvcli; - return 0; -error: - kbasep_hwcnt_virtualizer_client_free(hvcli); - return errcode; -} - -/** - * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a - * client's accumulation buffer. - * @hvcli: Non-NULL pointer to virtualizer client. - * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. - */ -static void kbasep_hwcnt_virtualizer_client_accumulate( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_dump_buffer *dump_buf) -{ - WARN_ON(!hvcli); - WARN_ON(!dump_buf); - lockdep_assert_held(&hvcli->hvirt->lock); - - if (hvcli->has_accum) { - /* If already some accumulation, accumulate */ - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, dump_buf, &hvcli->enable_map); - } else { - /* If no accumulation, copy */ - kbase_hwcnt_dump_buffer_copy( - &hvcli->accum_buf, dump_buf, &hvcli->enable_map); - } - hvcli->has_accum = true; -} - -/** - * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter - * accumulator after final client - * removal. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * - * Will safely terminate the accumulator in any partial state of initialisation. - */ -static void kbasep_hwcnt_virtualizer_accumulator_term( - struct kbase_hwcnt_virtualizer *hvirt) -{ - WARN_ON(!hvirt); - lockdep_assert_held(&hvirt->lock); - WARN_ON(hvirt->client_count); - - kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); - kbase_hwcnt_enable_map_free(&hvirt->scratch_map); - kbase_hwcnt_accumulator_release(hvirt->accum); - hvirt->accum = NULL; -} - -/** - * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter - * accumulator before first client - * addition. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_virtualizer_accumulator_init( - struct kbase_hwcnt_virtualizer *hvirt) -{ - int errcode; - - WARN_ON(!hvirt); - lockdep_assert_held(&hvirt->lock); - WARN_ON(hvirt->client_count); - WARN_ON(hvirt->accum); - - errcode = kbase_hwcnt_accumulator_acquire( - hvirt->hctx, &hvirt->accum); - if (errcode) - goto error; - - errcode = kbase_hwcnt_enable_map_alloc( - hvirt->metadata, &hvirt->scratch_map); - if (errcode) - goto error; - - errcode = kbase_hwcnt_dump_buffer_alloc( - hvirt->metadata, &hvirt->scratch_buf); - if (errcode) - goto error; - - return 0; -error: - kbasep_hwcnt_virtualizer_accumulator_term(hvirt); - return errcode; -} - -/** - * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the - * virtualizer. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @hvcli: Non-NULL pointer to the virtualizer client to add. - * @enable_map: Non-NULL pointer to client's initial enable map. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_virtualizer_client_add( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map) -{ - int errcode = 0; - u64 ts_start_ns; - u64 ts_end_ns; - - WARN_ON(!hvirt); - WARN_ON(!hvcli); - WARN_ON(!enable_map); - lockdep_assert_held(&hvirt->lock); - - if (hvirt->client_count == 0) - /* First client added, so initialise the accumulator */ - errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); - if (errcode) - return errcode; - - hvirt->client_count += 1; - - if (hvirt->client_count == 1) { - /* First client, so just pass the enable map onwards as is */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - enable_map, &ts_start_ns, &ts_end_ns, NULL); - } else { - struct kbase_hwcnt_virtualizer_client *pos; - - /* Make the scratch enable map the union of all enable maps */ - kbase_hwcnt_enable_map_copy( - &hvirt->scratch_map, enable_map); - list_for_each_entry(pos, &hvirt->clients, node) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); - - /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, - &ts_start_ns, &ts_end_ns, - &hvirt->scratch_buf); - /* Accumulate into only existing clients' accumulation bufs */ - if (!errcode) - list_for_each_entry(pos, &hvirt->clients, node) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); - } - if (errcode) - goto error; - - list_add(&hvcli->node, &hvirt->clients); - hvcli->hvirt = hvirt; - kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); - hvcli->has_accum = false; - hvcli->ts_start_ns = ts_end_ns; - - return 0; -error: - hvirt->client_count -= 1; - if (hvirt->client_count == 0) - kbasep_hwcnt_virtualizer_accumulator_term(hvirt); - return errcode; -} - -/** - * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the - * virtualizer. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @hvcli: Non-NULL pointer to the virtualizer client to remove. - */ -static void kbasep_hwcnt_virtualizer_client_remove( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli) -{ - int errcode = 0; - u64 ts_start_ns; - u64 ts_end_ns; - - WARN_ON(!hvirt); - WARN_ON(!hvcli); - lockdep_assert_held(&hvirt->lock); - - list_del(&hvcli->node); - hvirt->client_count -= 1; - - if (hvirt->client_count == 0) { - /* Last client removed, so terminate the accumulator */ - kbasep_hwcnt_virtualizer_accumulator_term(hvirt); - } else { - struct kbase_hwcnt_virtualizer_client *pos; - /* Make the scratch enable map the union of all enable maps */ - kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); - list_for_each_entry(pos, &hvirt->clients, node) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); - /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, - &ts_start_ns, &ts_end_ns, - &hvirt->scratch_buf); - /* Accumulate into remaining clients' accumulation bufs */ - if (!errcode) - list_for_each_entry(pos, &hvirt->clients, node) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); - } - WARN_ON(errcode); -} - -/** - * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's - * currently enabled counters, - * and enable a new set of - * counters that will be used for - * subsequent dumps. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @hvcli: Non-NULL pointer to the virtualizer client. - * @enable_map: Non-NULL pointer to the new counter enable map for the client. - * Must have the same metadata as the virtualizer. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * Return: 0 on success or error code. - */ -static int kbasep_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_virtualizer_client *pos; - - WARN_ON(!hvirt); - WARN_ON(!hvcli); - WARN_ON(!enable_map); - WARN_ON(!ts_start_ns); - WARN_ON(!ts_end_ns); - WARN_ON(enable_map->metadata != hvirt->metadata); - WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); - lockdep_assert_held(&hvirt->lock); - - /* Make the scratch enable map the union of all enable maps */ - kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry(pos, &hvirt->clients, node) - /* Ignore the enable map of the selected client */ - if (pos != hvcli) - kbase_hwcnt_enable_map_union( - &hvirt->scratch_map, &pos->enable_map); - - /* Set the counters with the new union enable map */ - errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, - &hvirt->scratch_map, ts_start_ns, ts_end_ns, - &hvirt->scratch_buf); - if (errcode) - return errcode; - - /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry(pos, &hvirt->clients, node) - if (pos != hvcli) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); - - /* Finally, write into the dump buf */ - if (dump_buf) { - const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; - - if (hvcli->has_accum) { - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, src, &hvcli->enable_map); - src = &hvcli->accum_buf; - } - kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); - } - hvcli->has_accum = false; - - /* Update the selected client's enable map */ - kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); - - /* Fix up the timestamps */ - *ts_start_ns = hvcli->ts_start_ns; - hvcli->ts_start_ns = *ts_end_ns; - - return errcode; -} - -int kbase_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_virtualizer *hvirt; - - if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) - return -EINVAL; - - hvirt = hvcli->hvirt; - - if ((enable_map->metadata != hvirt->metadata) || - (dump_buf && (dump_buf->metadata != hvirt->metadata))) - return -EINVAL; - - mutex_lock(&hvirt->lock); - - if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { - /* - * If there's only one client with no prior accumulation, we can - * completely skip the virtualize and just pass through the call - * to the accumulator, saving a fair few copies and - * accumulations. - */ - errcode = kbase_hwcnt_accumulator_set_counters( - hvirt->accum, enable_map, - ts_start_ns, ts_end_ns, dump_buf); - - if (!errcode) { - /* Update the selected client's enable map */ - kbase_hwcnt_enable_map_copy( - &hvcli->enable_map, enable_map); - - /* Fix up the timestamps */ - *ts_start_ns = hvcli->ts_start_ns; - hvcli->ts_start_ns = *ts_end_ns; - } - } else { - /* Otherwise, do the full virtualize */ - errcode = kbasep_hwcnt_virtualizer_client_set_counters( - hvirt, hvcli, enable_map, - ts_start_ns, ts_end_ns, dump_buf); - } - - mutex_unlock(&hvirt->lock); - - return errcode; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); - -/** - * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's - * currently enabled counters. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @hvcli: Non-NULL pointer to the virtualizer client. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * Return: 0 on success or error code. - */ -static int kbasep_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_virtualizer_client *pos; - - WARN_ON(!hvirt); - WARN_ON(!hvcli); - WARN_ON(!ts_start_ns); - WARN_ON(!ts_end_ns); - WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); - lockdep_assert_held(&hvirt->lock); - - /* Perform the dump */ - errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, - ts_start_ns, ts_end_ns, &hvirt->scratch_buf); - if (errcode) - return errcode; - - /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry(pos, &hvirt->clients, node) - if (pos != hvcli) - kbasep_hwcnt_virtualizer_client_accumulate( - pos, &hvirt->scratch_buf); - - /* Finally, write into the dump buf */ - if (dump_buf) { - const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; - - if (hvcli->has_accum) { - kbase_hwcnt_dump_buffer_accumulate( - &hvcli->accum_buf, src, &hvcli->enable_map); - src = &hvcli->accum_buf; - } - kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); - } - hvcli->has_accum = false; - - /* Fix up the timestamps */ - *ts_start_ns = hvcli->ts_start_ns; - hvcli->ts_start_ns = *ts_end_ns; - - return errcode; -} - -int kbase_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf) -{ - int errcode; - struct kbase_hwcnt_virtualizer *hvirt; - - if (!hvcli || !ts_start_ns || !ts_end_ns) - return -EINVAL; - - hvirt = hvcli->hvirt; - - if (dump_buf && (dump_buf->metadata != hvirt->metadata)) - return -EINVAL; - - mutex_lock(&hvirt->lock); - - if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { - /* - * If there's only one client with no prior accumulation, we can - * completely skip the virtualize and just pass through the call - * to the accumulator, saving a fair few copies and - * accumulations. - */ - errcode = kbase_hwcnt_accumulator_dump( - hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); - - if (!errcode) { - /* Fix up the timestamps */ - *ts_start_ns = hvcli->ts_start_ns; - hvcli->ts_start_ns = *ts_end_ns; - } - } else { - /* Otherwise, do the full virtualize */ - errcode = kbasep_hwcnt_virtualizer_client_dump( - hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); - } - - mutex_unlock(&hvirt->lock); - - return errcode; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); - -int kbase_hwcnt_virtualizer_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_virtualizer_client **out_hvcli) -{ - int errcode; - struct kbase_hwcnt_virtualizer_client *hvcli; - - if (!hvirt || !enable_map || !out_hvcli || - (enable_map->metadata != hvirt->metadata)) - return -EINVAL; - - errcode = kbasep_hwcnt_virtualizer_client_alloc( - hvirt->metadata, &hvcli); - if (errcode) - return errcode; - - mutex_lock(&hvirt->lock); - - errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); - - mutex_unlock(&hvirt->lock); - - if (errcode) { - kbasep_hwcnt_virtualizer_client_free(hvcli); - return errcode; - } - - *out_hvcli = hvcli; - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); - -void kbase_hwcnt_virtualizer_client_destroy( - struct kbase_hwcnt_virtualizer_client *hvcli) -{ - if (!hvcli) - return; - - mutex_lock(&hvcli->hvirt->lock); - - kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); - - mutex_unlock(&hvcli->hvirt->lock); - - kbasep_hwcnt_virtualizer_client_free(hvcli); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); - -int kbase_hwcnt_virtualizer_init( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_virtualizer **out_hvirt) -{ - struct kbase_hwcnt_virtualizer *virt; - const struct kbase_hwcnt_metadata *metadata; - - if (!hctx || !out_hvirt) - return -EINVAL; - - metadata = kbase_hwcnt_context_metadata(hctx); - if (!metadata) - return -EINVAL; - - virt = kzalloc(sizeof(*virt), GFP_KERNEL); - if (!virt) - return -ENOMEM; - - virt->hctx = hctx; - virt->metadata = metadata; - - mutex_init(&virt->lock); - INIT_LIST_HEAD(&virt->clients); - - *out_hvirt = virt; - return 0; -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); - -void kbase_hwcnt_virtualizer_term( - struct kbase_hwcnt_virtualizer *hvirt) -{ - if (!hvirt) - return; - - /* Non-zero client count implies client leak */ - if (WARN_ON(hvirt->client_count != 0)) { - struct kbase_hwcnt_virtualizer_client *pos, *n; - - list_for_each_entry_safe(pos, n, &hvirt->clients, node) - kbase_hwcnt_virtualizer_client_destroy(pos); - } - - WARN_ON(hvirt->client_count != 0); - WARN_ON(hvirt->accum); - - kfree(hvirt); -} -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.h deleted file mode 100644 index 1efa81d0f64a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_hwcnt_virtualizer.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Hardware counter virtualizer API. - * - * Virtualizes a hardware counter context, so multiple clients can access - * a single hardware counter resource as though each was the exclusive user. - */ - -#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ -#define _KBASE_HWCNT_VIRTUALIZER_H_ - -#include - -struct kbase_hwcnt_context; -struct kbase_hwcnt_virtualizer; -struct kbase_hwcnt_virtualizer_client; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/** - * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. - * @hctx: Non-NULL pointer to the hardware counter context to virtualize. - * @out_hvirt: Non-NULL pointer to where the pointer to the created virtualizer - * will be stored on success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_virtualizer_init( - struct kbase_hwcnt_context *hctx, - struct kbase_hwcnt_virtualizer **out_hvirt); - -/** - * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. - * @hvirt: Pointer to virtualizer to be terminated. - */ -void kbase_hwcnt_virtualizer_term( - struct kbase_hwcnt_virtualizer *hvirt); - -/** - * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by - * the virtualizer, so related counter data - * structures can be created. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * - * Return: Non-NULL pointer to metadata, or NULL on error. - */ -const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( - struct kbase_hwcnt_virtualizer *hvirt); - -/** - * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @enable_map: Non-NULL pointer to the enable map for the client. Must have the - * same metadata as the virtualizer. - * @out_hvcli: Non-NULL pointer to where the pointer to the created client will - * be stored on success. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_virtualizer_client_create( - struct kbase_hwcnt_virtualizer *hvirt, - const struct kbase_hwcnt_enable_map *enable_map, - struct kbase_hwcnt_virtualizer_client **out_hvcli); - -/** - * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. - * @hvcli: Pointer to the hardware counter client. - */ -void kbase_hwcnt_virtualizer_client_destroy( - struct kbase_hwcnt_virtualizer_client *hvcli); - -/** - * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's - * currently enabled counters, and - * enable a new set of counters - * that will be used for - * subsequent dumps. - * @hvcli: Non-NULL pointer to the virtualizer client. - * @enable_map: Non-NULL pointer to the new counter enable map for the client. - * Must have the same metadata as the virtualizer. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * Return: 0 on success or error code. - */ -int kbase_hwcnt_virtualizer_client_set_counters( - struct kbase_hwcnt_virtualizer_client *hvcli, - const struct kbase_hwcnt_enable_map *enable_map, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); - -/** - * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's - * currently enabled counters. - * @hvcli: Non-NULL pointer to the virtualizer client. - * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will - * be written out to on success. - * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will - * be written out to on success. - * @dump_buf: Pointer to the buffer where the dump will be written out to on - * success. If non-NULL, must have the same metadata as the - * accumulator. If NULL, the dump will be discarded. - * - * Return: 0 on success or error code. - */ -int kbase_hwcnt_virtualizer_client_dump( - struct kbase_hwcnt_virtualizer_client *hvcli, - u64 *ts_start_ns, - u64 *ts_end_ns, - struct kbase_hwcnt_dump_buffer *dump_buf); - -#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ioctl.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_ioctl.h deleted file mode 100755 index ccf67df923a0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_ioctl.h +++ /dev/null @@ -1,885 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_IOCTL_H_ -#define _KBASE_IOCTL_H_ - -#ifdef __cpluscplus -extern "C" { -#endif - -#include -#include - -#define KBASE_IOCTL_TYPE 0x80 - -/* - * 11.1: - * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags - * 11.2: - * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_SECURE, - * which some user-side clients prior to 11.2 might fault if they received - * them - * 11.3: - * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and - * KBASE_IOCTL_STICKY_RESOURCE_UNMAP - * 11.4: - * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET - * 11.5: - * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) - * 11.6: - * - Added flags field to base_jit_alloc_info structure, which can be used to - * specify pseudo chunked tiler alignment for JIT allocations. - * 11.7: - * - Removed UMP support - * 11.8: - * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags - * 11.9: - * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY - * under base_mem_alloc_flags - * 11.10: - * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for - * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations - * with one softjob. - * 11.11: - * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags - * 11.12: - * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS - * 11.13: - * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT - */ -#define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 13 - -/** - * struct kbase_ioctl_version_check - Check version compatibility with kernel - * - * @major: Major version number - * @minor: Minor version number - */ -struct kbase_ioctl_version_check { - __u16 major; - __u16 minor; -}; - -#define KBASE_IOCTL_VERSION_CHECK \ - _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) - -/** - * struct kbase_ioctl_set_flags - Set kernel context creation flags - * - * @create_flags: Flags - see base_context_create_flags - */ -struct kbase_ioctl_set_flags { - __u32 create_flags; -}; - -#define KBASE_IOCTL_SET_FLAGS \ - _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) - -/** - * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel - * - * @addr: Memory address of an array of struct base_jd_atom_v2 - * @nr_atoms: Number of entries in the array - * @stride: sizeof(struct base_jd_atom_v2) - */ -struct kbase_ioctl_job_submit { - __u64 addr; - __u32 nr_atoms; - __u32 stride; -}; - -#define KBASE_IOCTL_JOB_SUBMIT \ - _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) - -/** - * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel - * - * @buffer: Pointer to the buffer to store properties into - * @size: Size of the buffer - * @flags: Flags - must be zero for now - * - * The ioctl will return the number of bytes stored into @buffer or an error - * on failure (e.g. @size is too small). If @size is specified as 0 then no - * data will be written but the return value will be the number of bytes needed - * for all the properties. - * - * @flags may be used in the future to request a different format for the - * buffer. With @flags == 0 the following format is used. - * - * The buffer will be filled with pairs of values, a u32 key identifying the - * property followed by the value. The size of the value is identified using - * the bottom bits of the key. The value then immediately followed the key and - * is tightly packed (there is no padding). All keys and values are - * little-endian. - * - * 00 = u8 - * 01 = u16 - * 10 = u32 - * 11 = u64 - */ -struct kbase_ioctl_get_gpuprops { - __u64 buffer; - __u32 size; - __u32 flags; -}; - -#define KBASE_IOCTL_GET_GPUPROPS \ - _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) - -#define KBASE_IOCTL_POST_TERM \ - _IO(KBASE_IOCTL_TYPE, 4) - -/** - * union kbase_ioctl_mem_alloc - Allocate memory on the GPU - * - * @va_pages: The number of pages of virtual address space to reserve - * @commit_pages: The number of physical pages to allocate - * @extent: The number of extra pages to allocate on each GPU fault which grows - * the region - * @flags: Flags - * @gpu_va: The GPU virtual address which is allocated - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_alloc { - struct { - __u64 va_pages; - __u64 commit_pages; - __u64 extent; - __u64 flags; - } in; - struct { - __u64 flags; - __u64 gpu_va; - } out; -}; - -#define KBASE_IOCTL_MEM_ALLOC \ - _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) - -/** - * struct kbase_ioctl_mem_query - Query properties of a GPU memory region - * @gpu_addr: A GPU address contained within the region - * @query: The type of query - * @value: The result of the query - * - * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_query { - struct { - __u64 gpu_addr; - __u64 query; - } in; - struct { - __u64 value; - } out; -}; - -#define KBASE_IOCTL_MEM_QUERY \ - _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) - -#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) -#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) -#define KBASE_MEM_QUERY_FLAGS ((u64)3) - -/** - * struct kbase_ioctl_mem_free - Free a memory region - * @gpu_addr: Handle to the region to free - */ -struct kbase_ioctl_mem_free { - __u64 gpu_addr; -}; - -#define KBASE_IOCTL_MEM_FREE \ - _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) - -/** - * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader - * @buffer_count: requested number of dumping buffers - * @jm_bm: counters selection bitmask (JM) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - * - * A fd is returned from the ioctl if successful, or a negative value on error - */ -struct kbase_ioctl_hwcnt_reader_setup { - __u32 buffer_count; - __u32 jm_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -#define KBASE_IOCTL_HWCNT_READER_SETUP \ - _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) - -/** - * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection - * @dump_buffer: GPU address to write counters to - * @jm_bm: counters selection bitmask (JM) - * @shader_bm: counters selection bitmask (Shader) - * @tiler_bm: counters selection bitmask (Tiler) - * @mmu_l2_bm: counters selection bitmask (MMU_L2) - */ -struct kbase_ioctl_hwcnt_enable { - __u64 dump_buffer; - __u32 jm_bm; - __u32 shader_bm; - __u32 tiler_bm; - __u32 mmu_l2_bm; -}; - -#define KBASE_IOCTL_HWCNT_ENABLE \ - _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) - -#define KBASE_IOCTL_HWCNT_DUMP \ - _IO(KBASE_IOCTL_TYPE, 10) - -#define KBASE_IOCTL_HWCNT_CLEAR \ - _IO(KBASE_IOCTL_TYPE, 11) - -/** - * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. - * @data: Counter samples for the dummy model. - * @size: Size of the counter sample data. - * @padding: Padding. - */ -struct kbase_ioctl_hwcnt_values { - __u64 data; - __u32 size; - __u32 padding; -}; - -#define KBASE_IOCTL_HWCNT_SET \ - _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) - -/** - * struct kbase_ioctl_disjoint_query - Query the disjoint counter - * @counter: A counter of disjoint events in the kernel - */ -struct kbase_ioctl_disjoint_query { - __u32 counter; -}; - -#define KBASE_IOCTL_DISJOINT_QUERY \ - _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) - -/** - * struct kbase_ioctl_get_ddk_version - Query the kernel version - * @version_buffer: Buffer to receive the kernel version string - * @size: Size of the buffer - * @padding: Padding - * - * The ioctl will return the number of bytes written into version_buffer - * (which includes a NULL byte) or a negative error code - * - * The ioctl request code has to be _IOW because the data in ioctl struct is - * being copied to the kernel, even though the kernel then writes out the - * version info to the buffer specified in the ioctl. - */ -struct kbase_ioctl_get_ddk_version { - __u64 version_buffer; - __u32 size; - __u32 padding; -}; - -#define KBASE_IOCTL_GET_DDK_VERSION \ - _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) - -/** - * struct kbase_ioctl_mem_jit_init_old - Initialise the JIT memory allocator - * - * @va_pages: Number of VA pages to reserve for JIT - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - * - * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for - * backwards compatibility. - */ -struct kbase_ioctl_mem_jit_init_old { - __u64 va_pages; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT_OLD \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_old) - -/** - * struct kbase_ioctl_mem_jit_init - Initialise the JIT memory allocator - * - * @va_pages: Number of VA pages to reserve for JIT - * @max_allocations: Maximum number of concurrent allocations - * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) - * @padding: Currently unused, must be zero - * - * Note that depending on the VA size of the application and GPU, the value - * specified in @va_pages may be ignored. - */ -struct kbase_ioctl_mem_jit_init { - __u64 va_pages; - __u8 max_allocations; - __u8 trim_level; - __u8 padding[6]; -}; - -#define KBASE_IOCTL_MEM_JIT_INIT \ - _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) - -/** - * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory - * - * @handle: GPU memory handle (GPU VA) - * @user_addr: The address where it is mapped in user space - * @size: The number of bytes to synchronise - * @type: The direction to synchronise: 0 is sync to memory (clean), - * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. - * @padding: Padding to round up to a multiple of 8 bytes, must be zero - */ -struct kbase_ioctl_mem_sync { - __u64 handle; - __u64 user_addr; - __u64 size; - __u8 type; - __u8 padding[7]; -}; - -#define KBASE_IOCTL_MEM_SYNC \ - _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) - -/** - * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer - * - * @gpu_addr: The GPU address of the memory region - * @cpu_addr: The CPU address to locate - * @size: A size in bytes to validate is contained within the region - * @offset: The offset from the start of the memory region to @cpu_addr - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_find_cpu_offset { - struct { - __u64 gpu_addr; - __u64 cpu_addr; - __u64 size; - } in; - struct { - __u64 offset; - } out; -}; - -#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ - _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) - -/** - * struct kbase_ioctl_get_context_id - Get the kernel context ID - * - * @id: The kernel context ID - */ -struct kbase_ioctl_get_context_id { - __u32 id; -}; - -#define KBASE_IOCTL_GET_CONTEXT_ID \ - _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) - -/** - * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd - * - * @flags: Flags - * - * The ioctl returns a file descriptor when successful - */ -struct kbase_ioctl_tlstream_acquire { - __u32 flags; -}; - -#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ - _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) - -#define KBASE_IOCTL_TLSTREAM_FLUSH \ - _IO(KBASE_IOCTL_TYPE, 19) - -/** - * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region - * - * @gpu_addr: The memory region to modify - * @pages: The number of physical pages that should be present - * - * The ioctl may return on the following error codes or 0 for success: - * -ENOMEM: Out of memory - * -EINVAL: Invalid arguments - */ -struct kbase_ioctl_mem_commit { - __u64 gpu_addr; - __u64 pages; -}; - -#define KBASE_IOCTL_MEM_COMMIT \ - _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) - -/** - * union kbase_ioctl_mem_alias - Create an alias of memory regions - * @flags: Flags, see BASE_MEM_xxx - * @stride: Bytes between start of each memory region - * @nents: The number of regions to pack together into the alias - * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info - * @gpu_va: Address of the new alias - * @va_pages: Size of the new alias - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_alias { - struct { - __u64 flags; - __u64 stride; - __u64 nents; - __u64 aliasing_info; - } in; - struct { - __u64 flags; - __u64 gpu_va; - __u64 va_pages; - } out; -}; - -#define KBASE_IOCTL_MEM_ALIAS \ - _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) - -/** - * union kbase_ioctl_mem_import - Import memory for use by the GPU - * @flags: Flags, see BASE_MEM_xxx - * @phandle: Handle to the external memory - * @type: Type of external memory, see base_mem_import_type - * @padding: Amount of extra VA pages to append to the imported buffer - * @gpu_va: Address of the new alias - * @va_pages: Size of the new alias - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_import { - struct { - __u64 flags; - __u64 phandle; - __u32 type; - __u32 padding; - } in; - struct { - __u64 flags; - __u64 gpu_va; - __u64 va_pages; - } out; -}; - -#define KBASE_IOCTL_MEM_IMPORT \ - _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) - -/** - * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region - * @gpu_va: The GPU region to modify - * @flags: The new flags to set - * @mask: Mask of the flags to modify - */ -struct kbase_ioctl_mem_flags_change { - __u64 gpu_va; - __u64 flags; - __u64 mask; -}; - -#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ - _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) - -/** - * struct kbase_ioctl_stream_create - Create a synchronisation stream - * @name: A name to identify this stream. Must be NULL-terminated. - * - * Note that this is also called a "timeline", but is named stream to avoid - * confusion with other uses of the word. - * - * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. - * - * The ioctl returns a file descriptor. - */ -struct kbase_ioctl_stream_create { - char name[32]; -}; - -#define KBASE_IOCTL_STREAM_CREATE \ - _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) - -/** - * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence - * @fd: The file descriptor to validate - */ -struct kbase_ioctl_fence_validate { - int fd; -}; - -#define KBASE_IOCTL_FENCE_VALIDATE \ - _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) - -/** - * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel - * @buffer: Pointer to the information - * @len: Length - * @padding: Padding - * - * The data provided is accessible through a debugfs file - */ -struct kbase_ioctl_mem_profile_add { - __u64 buffer; - __u32 len; - __u32 padding; -}; - -#define KBASE_IOCTL_MEM_PROFILE_ADD \ - _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) - -/** - * struct kbase_ioctl_soft_event_update - Update the status of a soft-event - * @event: GPU address of the event which has been updated - * @new_status: The new status to set - * @flags: Flags for future expansion - */ -struct kbase_ioctl_soft_event_update { - __u64 event; - __u32 new_status; - __u32 flags; -}; - -#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ - _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) - -/** - * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource - * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to map - */ -struct kbase_ioctl_sticky_resource_map { - __u64 count; - __u64 address; -}; - -#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ - _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) - -/** - * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was - * previously permanently mapped - * @count: Number of resources - * @address: Array of u64 GPU addresses of the external resources to unmap - */ -struct kbase_ioctl_sticky_resource_unmap { - __u64 count; - __u64 address; -}; - -#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ - _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) - -/** - * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of - * the GPU memory region for - * the given gpu address and - * the offset of that address - * into the region - * - * @gpu_addr: GPU virtual address - * @size: Size in bytes within the region - * @start: Address of the beginning of the memory region enclosing @gpu_addr - * for the length of @offset bytes - * @offset: The offset from the start of the memory region to @gpu_addr - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_mem_find_gpu_start_and_offset { - struct { - __u64 gpu_addr; - __u64 size; - } in; - struct { - __u64 start; - __u64 offset; - } out; -}; - -#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ - _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) - - -#define KBASE_IOCTL_CINSTR_GWT_START \ - _IO(KBASE_IOCTL_TYPE, 33) - -#define KBASE_IOCTL_CINSTR_GWT_STOP \ - _IO(KBASE_IOCTL_TYPE, 34) - -/** - * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. - * @addr_buffer: Address of buffer to hold addresses of gpu modified areas. - * @size_buffer: Address of buffer to hold size of modified areas (in pages) - * @len: Number of addresses the buffers can hold. - * @more_data_available: Status indicating if more addresses are available. - * @no_of_addr_collected: Number of addresses collected into addr_buffer. - * - * @in: Input parameters - * @out: Output parameters - * - * This structure is used when performing a call to dump GPU write fault - * addresses. - */ -union kbase_ioctl_cinstr_gwt_dump { - struct { - __u64 addr_buffer; - __u64 size_buffer; - __u32 len; - __u32 padding; - - } in; - struct { - __u32 no_of_addr_collected; - __u8 more_data_available; - __u8 padding[27]; - } out; -}; - -#define KBASE_IOCTL_CINSTR_GWT_DUMP \ - _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) - - -/** - * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone - * - * @va_pages: Number of VA pages to reserve for EXEC_VA - */ -struct kbase_ioctl_mem_exec_init { - __u64 va_pages; -}; - -#define KBASE_IOCTL_MEM_EXEC_INIT \ - _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) - - -/*************** - * test ioctls * - ***************/ -#if MALI_UNIT_TEST -/* These ioctls are purely for test purposes and are not used in the production - * driver, they therefore may change without notice - */ - -#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) - -/** - * struct kbase_ioctl_tlstream_test - Start a timeline stream test - * - * @tpw_count: number of trace point writers in each context - * @msg_delay: time delay between tracepoints from one writer in milliseconds - * @msg_count: number of trace points written by one writer - * @aux_msg: if non-zero aux messages will be included - */ -struct kbase_ioctl_tlstream_test { - __u32 tpw_count; - __u32 msg_delay; - __u32 msg_count; - __u32 aux_msg; -}; - -#define KBASE_IOCTL_TLSTREAM_TEST \ - _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) - -/** - * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes - * @bytes_collected: number of bytes read by user - * @bytes_generated: number of bytes generated by tracepoints - */ -struct kbase_ioctl_tlstream_stats { - __u32 bytes_collected; - __u32 bytes_generated; -}; - -#define KBASE_IOCTL_TLSTREAM_STATS \ - _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) - -/** - * struct kbase_ioctl_cs_event_memory_write - Write an event memory address - * @cpu_addr: Memory address to write - * @value: Value to write - * @padding: Currently unused, must be zero - */ -struct kbase_ioctl_cs_event_memory_write { - __u64 cpu_addr; - __u8 value; - __u8 padding[7]; -}; - -/** - * union kbase_ioctl_cs_event_memory_read - Read an event memory address - * @cpu_addr: Memory address to read - * @value: Value read - * @padding: Currently unused, must be zero - * - * @in: Input parameters - * @out: Output parameters - */ -union kbase_ioctl_cs_event_memory_read { - struct { - __u64 cpu_addr; - } in; - struct { - __u8 value; - __u8 padding[7]; - } out; -}; - -#endif - -/* Customer extension range */ -#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) - -/* If the integration needs extra ioctl add them there - * like this: - * - * struct my_ioctl_args { - * .... - * } - * - * #define KBASE_IOCTL_MY_IOCTL \ - * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) - */ - - -/********************************** - * Definitions for GPU properties * - **********************************/ -#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) -#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) -#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) -#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) - -#define KBASE_GPUPROP_PRODUCT_ID 1 -#define KBASE_GPUPROP_VERSION_STATUS 2 -#define KBASE_GPUPROP_MINOR_REVISION 3 -#define KBASE_GPUPROP_MAJOR_REVISION 4 -/* 5 previously used for GPU speed */ -#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 -/* 7 previously used for minimum GPU speed */ -#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 -#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 -#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 -#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 -#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 - -#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 -#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 -#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 - -#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 -#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 - -#define KBASE_GPUPROP_MAX_THREADS 18 -#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 -#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 -#define KBASE_GPUPROP_MAX_REGISTERS 21 -#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 -#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 -#define KBASE_GPUPROP_IMPL_TECH 24 - -#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 -#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 -#define KBASE_GPUPROP_RAW_L2_PRESENT 27 -#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 -#define KBASE_GPUPROP_RAW_L2_FEATURES 29 -#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 -#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 -#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 -#define KBASE_GPUPROP_RAW_AS_PRESENT 33 -#define KBASE_GPUPROP_RAW_JS_PRESENT 34 -#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 -#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 -#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 -#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 -#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 -#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 -#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 -#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 -#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 -#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 -#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 -#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 -#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 -#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 -#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 -#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 -#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 -#define KBASE_GPUPROP_RAW_GPU_ID 55 -#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 -#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 -#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 -#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 -#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 - -#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 -#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 -#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 -#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 -#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 -#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 -#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 -#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 -#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 -#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 -#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 -#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 -#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 -#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 -#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 -#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 -#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 -#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 -#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 - -#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 -#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 - -#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 - -#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 -#define KBASE_GPUPROP_TLS_ALLOC 84 - -#ifdef __cpluscplus -} -#endif - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd.c deleted file mode 100755 index 97d7b43104ff..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd.c +++ /dev/null @@ -1,1621 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#if defined(CONFIG_DMA_SHARED_BUFFER) -#include -#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -#ifdef CONFIG_COMPAT -#include -#endif -#include -#include -#include -#include - -#include -#include -#include - -#include "mali_kbase_dma_fence.h" - -#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -/* random32 was renamed to prandom_u32 in 3.8 */ -#define prandom_u32 random32 -#endif - -/* Return whether katom will run on the GPU or not. Currently only soft jobs and - * dependency-only atoms do not run on the GPU */ -#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ - ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ - BASE_JD_REQ_DEP))) -/* - * This is the kernel side of the API. Only entry points are: - * - kbase_jd_submit(): Called from userspace to submit a single bag - * - kbase_jd_done(): Called from interrupt context to track the - * completion of a job. - * Callouts: - * - to the job manager (enqueue a job) - * - to the event subsystem (signals the completion/failure of bag/job-chains). - */ - -static void __user * -get_compat_pointer(struct kbase_context *kctx, const u64 p) -{ -#ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - return compat_ptr(p); -#endif - return u64_to_user_ptr(p); -} - -/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs - * - * Returns whether the JS needs a reschedule. - * - * Note that the caller must also check the atom status and - * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock - */ -static int jd_run_atom(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); - - if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { - /* Dependency only atom */ - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - return 0; - } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* Soft-job */ - if (katom->will_fail_event_code) { - kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - return 0; - } - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_REPLAY) { - if (!kbase_replay_process(katom)) - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - } else if (kbase_process_soft_job(katom) == 0) { - kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - } - return 0; - } - - katom->status = KBASE_JD_ATOM_STATE_IN_JS; - /* Queue an action about whether we should try scheduling a context */ - return kbasep_js_add_job(kctx, katom); -} - -#if defined(CONFIG_MALI_DMA_FENCE) -void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) -{ - struct kbase_device *kbdev; - - KBASE_DEBUG_ASSERT(katom); - kbdev = katom->kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev); - - /* Check whether the atom's other dependencies were already met. If - * katom is a GPU atom then the job scheduler may be able to represent - * the dependencies, hence we may attempt to submit it before they are - * met. Other atoms must have had both dependencies resolved. - */ - if (IS_GPU_ATOM(katom) || - (!kbase_jd_katom_dep_atom(&katom->dep[0]) && - !kbase_jd_katom_dep_atom(&katom->dep[1]))) { - /* katom dep complete, attempt to run it */ - bool resched = false; - - resched = jd_run_atom(katom); - - if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - /* The atom has already finished */ - resched |= jd_done_nolock(katom, NULL); - } - - if (resched) - kbase_js_sched_all(kbdev); - } -} -#endif - -void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) -{ -#ifdef CONFIG_MALI_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - * Any successfully completed atom would have had all it's callbacks - * completed before the atom was run, so only flush for failed atoms. - */ - if (katom->event_code != BASE_JD_EVENT_DONE) - flush_workqueue(katom->kctx->dma_fence.wq); -#endif /* CONFIG_MALI_DMA_FENCE */ -} - -static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) -{ - KBASE_DEBUG_ASSERT(katom); - KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); - -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_signal(katom); -#endif /* CONFIG_MALI_DMA_FENCE */ - - kbase_gpu_vm_lock(katom->kctx); - /* only roll back if extres is non-NULL */ - if (katom->extres) { - u32 res_no; - - res_no = katom->nr_extres; - while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); - } - kfree(katom->extres); - katom->extres = NULL; - } - kbase_gpu_vm_unlock(katom->kctx); -} - -/* - * Set up external resources needed by this job. - * - * jctx.lock must be held when this is called. - */ - -static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) -{ - int err_ret_val = -EINVAL; - u32 res_no; -#ifdef CONFIG_MALI_DMA_FENCE - struct kbase_dma_fence_resv_info info = { - .dma_fence_resv_count = 0, - }; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - /* - * When both dma-buf fence and Android native sync is enabled, we - * disable dma-buf fence for contexts that are using Android native - * fences. - */ - const bool implicit_sync = !kbase_ctx_flag(katom->kctx, - KCTX_NO_IMPLICIT_SYNC); -#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/ - const bool implicit_sync = true; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ -#endif /* CONFIG_MALI_DMA_FENCE */ - struct base_external_resource *input_extres; - - KBASE_DEBUG_ASSERT(katom); - KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); - - /* no resources encoded, early out */ - if (!katom->nr_extres) - return -EINVAL; - - katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); - if (NULL == katom->extres) { - err_ret_val = -ENOMEM; - goto early_err_out; - } - - /* copy user buffer to the end of our real buffer. - * Make sure the struct sizes haven't changed in a way - * we don't support */ - BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); - input_extres = (struct base_external_resource *) - (((unsigned char *)katom->extres) + - (sizeof(*katom->extres) - sizeof(*input_extres)) * - katom->nr_extres); - - if (copy_from_user(input_extres, - get_compat_pointer(katom->kctx, user_atom->extres_list), - sizeof(*input_extres) * katom->nr_extres) != 0) { - err_ret_val = -EINVAL; - goto early_err_out; - } - -#ifdef CONFIG_MALI_DMA_FENCE - if (implicit_sync) { - info.resv_objs = kmalloc_array(katom->nr_extres, - sizeof(struct reservation_object *), - GFP_KERNEL); - if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; - } - - info.dma_fence_excl_bitmap = - kcalloc(BITS_TO_LONGS(katom->nr_extres), - sizeof(unsigned long), GFP_KERNEL); - if (!info.dma_fence_excl_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; - } - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - /* Take the processes mmap lock */ - down_read(¤t->mm->mmap_sem); - - /* need to keep the GPU VM locked while we set up UMM buffers */ - kbase_gpu_vm_lock(katom->kctx); - for (res_no = 0; res_no < katom->nr_extres; res_no++) { - struct base_external_resource *res; - struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; - bool exclusive; - - res = &input_extres[res_no]; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) - ? true : false; - reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, - res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - /* did we find a matching region object? */ - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { - /* roll back */ - goto failed_loop; - } - - if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && - (reg->flags & KBASE_REG_SECURE)) { - katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; - } - - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm); - if (!alloc) { - err_ret_val = -EINVAL; - goto failed_loop; - } - -#ifdef CONFIG_MALI_DMA_FENCE - if (implicit_sync && - reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - struct reservation_object *resv; - - resv = reg->gpu_alloc->imported.umm.dma_buf->resv; - if (resv) - kbase_dma_fence_add_reservation(resv, &info, - exclusive); - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - /* finish with updating out array with the data we found */ - /* NOTE: It is important that this is the last thing we do (or - * at least not before the first write) as we overwrite elements - * as we loop and could be overwriting ourself, so no writes - * until the last read for an element. - * */ - katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; - } - /* successfully parsed the extres array */ - /* drop the vm lock now */ - kbase_gpu_vm_unlock(katom->kctx); - - /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); - -#ifdef CONFIG_MALI_DMA_FENCE - if (implicit_sync) { - if (info.dma_fence_resv_count) { - int ret; - - ret = kbase_dma_fence_wait(katom, &info); - if (ret < 0) - goto failed_dma_fence_setup; - } - - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - /* all done OK */ - return 0; - -/* error handling section */ - -#ifdef CONFIG_MALI_DMA_FENCE -failed_dma_fence_setup: - /* Lock the processes mmap lock */ - down_read(¤t->mm->mmap_sem); - - /* lock before we unmap */ - kbase_gpu_vm_lock(katom->kctx); -#endif - - failed_loop: - /* undo the loop work */ - while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - - kbase_unmap_external_resource(katom->kctx, NULL, alloc); - } - kbase_gpu_vm_unlock(katom->kctx); - - /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); - - early_err_out: - kfree(katom->extres); - katom->extres = NULL; -#ifdef CONFIG_MALI_DMA_FENCE - if (implicit_sync) { - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); - } -#endif - return err_ret_val; -} - -static inline void jd_resolve_dep(struct list_head *out_list, - struct kbase_jd_atom *katom, - u8 d, bool ctx_is_dying) -{ - u8 other_d = !d; - - while (!list_empty(&katom->dep_head[d])) { - struct kbase_jd_atom *dep_atom; - struct kbase_jd_atom *other_dep_atom; - u8 dep_type; - - dep_atom = list_entry(katom->dep_head[d].next, - struct kbase_jd_atom, dep_item[d]); - list_del(katom->dep_head[d].next); - - dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); - kbase_jd_katom_dep_clear(&dep_atom->dep[d]); - - if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER)) { -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_cancel_callbacks(dep_atom); -#endif - - dep_atom->event_code = katom->event_code; - KBASE_DEBUG_ASSERT(dep_atom->status != - KBASE_JD_ATOM_STATE_UNUSED); - - if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - != BASE_JD_REQ_SOFT_REPLAY) { - dep_atom->will_fail_event_code = - dep_atom->event_code; - } else { - dep_atom->status = - KBASE_JD_ATOM_STATE_COMPLETED; - } - } - other_dep_atom = (struct kbase_jd_atom *) - kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); - - if (!dep_atom->in_jd_list && (!other_dep_atom || - (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && - !dep_atom->will_fail_event_code && - !other_dep_atom->will_fail_event_code))) { - bool dep_satisfied = true; -#ifdef CONFIG_MALI_DMA_FENCE - int dep_count; - - dep_count = kbase_fence_dep_count_read(dep_atom); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else { - /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. - * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. - */ - dep_satisfied = false; - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - if (dep_satisfied) { - dep_atom->in_jd_list = true; - list_add_tail(&dep_atom->jd_item, out_list); - } - } - } -} - -KBASE_EXPORT_TEST_API(jd_resolve_dep); - -#if MALI_CUSTOMER_RELEASE == 0 -static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -{ - kbdev->force_replay_count++; - - if (kbdev->force_replay_count >= kbdev->force_replay_limit) { - kbdev->force_replay_count = 0; - katom->event_code = BASE_JD_EVENT_FORCE_REPLAY; - - if (kbdev->force_replay_random) - kbdev->force_replay_limit = - (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1; - - dev_info(kbdev->dev, "force_replay : promoting to error\n"); - } -} - -/** Test to see if atom should be forced to fail. - * - * This function will check if an atom has a replay job as a dependent. If so - * then it will be considered for forced failure. */ -static void jd_check_force_failure(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct kbase_device *kbdev = kctx->kbdev; - int i; - - if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) || - (katom->core_req & BASEP_JD_REQ_EVENT_NEVER)) - return; - - for (i = 1; i < BASE_JD_ATOM_COUNT; i++) { - if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom || - kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { - struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; - - if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_REPLAY && - (dep_atom->core_req & kbdev->force_replay_core_req) - == kbdev->force_replay_core_req) { - jd_force_failure(kbdev, katom); - return; - } - } - } -} -#endif - -/** - * is_dep_valid - Validate that a dependency is valid for early dependency - * submission - * @katom: Dependency atom to validate - * - * A dependency is valid if any of the following are true : - * - It does not exist (a non-existent dependency does not block submission) - * - It is in the job scheduler - * - It has completed, does not have a failure event code, and has not been - * marked to fail in the future - * - * Return: true if valid, false otherwise - */ -static bool is_dep_valid(struct kbase_jd_atom *katom) -{ - /* If there's no dependency then this is 'valid' from the perspective of - * early dependency submission */ - if (!katom) - return true; - - /* Dependency must have reached the job scheduler */ - if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) - return false; - - /* If dependency has completed and has failed or will fail then it is - * not valid */ - if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && - (katom->event_code != BASE_JD_EVENT_DONE || - katom->will_fail_event_code)) - return false; - - return true; -} - -static void jd_try_submitting_deps(struct list_head *out_list, - struct kbase_jd_atom *node) -{ - int i; - - for (i = 0; i < 2; i++) { - struct list_head *pos; - - list_for_each(pos, &node->dep_head[i]) { - struct kbase_jd_atom *dep_atom = list_entry(pos, - struct kbase_jd_atom, dep_item[i]); - - if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { - /*Check if atom deps look sane*/ - bool dep0_valid = is_dep_valid( - dep_atom->dep[0].atom); - bool dep1_valid = is_dep_valid( - dep_atom->dep[1].atom); - bool dep_satisfied = true; -#ifdef CONFIG_MALI_DMA_FENCE - int dep_count; - - dep_count = kbase_fence_dep_count_read( - dep_atom); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else { - /* - * There are either still active callbacks, or - * all fences for this @dep_atom has signaled, - * but the worker that will queue the atom has - * not yet run. - * - * Wait for the fences to signal and the fence - * worker to run and handle @dep_atom. If - * @dep_atom was completed due to error on - * @katom, then the fence worker will pick up - * the complete status and error code set on - * @dep_atom above. - */ - dep_satisfied = false; - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - if (dep0_valid && dep1_valid && dep_satisfied) { - dep_atom->in_jd_list = true; - list_add(&dep_atom->jd_item, out_list); - } - } - } - } -} - -/* - * Perform the necessary handling of an atom that has finished running - * on the GPU. - * - * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller - * is responsible for calling kbase_finish_soft_job *before* calling this function. - * - * The caller must hold the kbase_jd_context.lock. - */ -bool jd_done_nolock(struct kbase_jd_atom *katom, - struct list_head *completed_jobs_ctx) -{ - struct kbase_context *kctx = katom->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct list_head completed_jobs; - struct list_head runnable_jobs; - bool need_to_try_schedule_context = false; - int i; - - INIT_LIST_HEAD(&completed_jobs); - INIT_LIST_HEAD(&runnable_jobs); - - KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); - -#if MALI_CUSTOMER_RELEASE == 0 - jd_check_force_failure(katom); -#endif - - /* This is needed in case an atom is failed due to being invalid, this - * can happen *before* the jobs that the atom depends on have completed */ - for (i = 0; i < 2; i++) { - if (kbase_jd_katom_dep_atom(&katom->dep[i])) { - list_del(&katom->dep_item[i]); - kbase_jd_katom_dep_clear(&katom->dep[i]); - } - } - - /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with - * BASE_JD_EVENT_TILE_RANGE_FAULT. - * - * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the - * error code to BASE_JD_EVENT_DONE - */ - - if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) && - katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) { - if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) { - /* Promote the failure to job done */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED); - } - } - - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&katom->jd_item, &completed_jobs); - - while (!list_empty(&completed_jobs)) { - katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); - list_del(completed_jobs.prev); - KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); - - for (i = 0; i < 2; i++) - jd_resolve_dep(&runnable_jobs, katom, i, - kbase_ctx_flag(kctx, KCTX_DYING)); - - if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) - kbase_jd_post_external_resources(katom); - - while (!list_empty(&runnable_jobs)) { - struct kbase_jd_atom *node; - - node = list_entry(runnable_jobs.next, - struct kbase_jd_atom, jd_item); - list_del(runnable_jobs.next); - node->in_jd_list = false; - - KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); - - if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && - !kbase_ctx_flag(kctx, KCTX_DYING)) { - need_to_try_schedule_context |= jd_run_atom(node); - } else { - node->event_code = katom->event_code; - - if ((node->core_req & - BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_REPLAY) { - if (kbase_replay_process(node)) - /* Don't complete this atom */ - continue; - } else if (node->core_req & - BASE_JD_REQ_SOFT_JOB) { - WARN_ON(!list_empty(&node->queue)); - kbase_finish_soft_job(node); - } - node->status = KBASE_JD_ATOM_STATE_COMPLETED; - } - - if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { - list_add_tail(&node->jd_item, &completed_jobs); - } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && - !node->will_fail_event_code) { - /* Node successfully submitted, try submitting - * dependencies as they may now be representable - * in JS */ - jd_try_submitting_deps(&runnable_jobs, node); - } - } - - /* Register a completed job as a disjoint event when the GPU - * is in a disjoint state (ie. being reset or replaying jobs). - */ - kbase_disjoint_event_potential(kctx->kbdev); - if (completed_jobs_ctx) - list_add_tail(&katom->jd_item, completed_jobs_ctx); - else - kbase_event_post(kctx, katom); - - /* Decrement and check the TOTAL number of jobs. This includes - * those not tracked by the scheduler: 'not ready to run' and - * 'dependency-only' jobs. */ - if (--kctx->jctx.job_nr == 0) - wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter - * that we've got no more jobs (so we can be safely terminated) */ - } - - return need_to_try_schedule_context; -} - -KBASE_EXPORT_TEST_API(jd_done_nolock); - -#ifdef CONFIG_GPU_TRACEPOINTS -enum { - CORE_REQ_DEP_ONLY, - CORE_REQ_SOFT, - CORE_REQ_COMPUTE, - CORE_REQ_FRAGMENT, - CORE_REQ_VERTEX, - CORE_REQ_TILER, - CORE_REQ_FRAGMENT_VERTEX, - CORE_REQ_FRAGMENT_VERTEX_TILER, - CORE_REQ_FRAGMENT_TILER, - CORE_REQ_VERTEX_TILER, - CORE_REQ_UNKNOWN -}; -static const char * const core_req_strings[] = { - "Dependency Only Job", - "Soft Job", - "Compute Shader Job", - "Fragment Shader Job", - "Vertex/Geometry Shader Job", - "Tiler Job", - "Fragment Shader + Vertex/Geometry Shader Job", - "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", - "Fragment Shader + Tiler Job", - "Vertex/Geometry Shader Job + Tiler Job", - "Unknown Job" -}; -static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) -{ - if (core_req & BASE_JD_REQ_SOFT_JOB) - return core_req_strings[CORE_REQ_SOFT]; - if (core_req & BASE_JD_REQ_ONLY_COMPUTE) - return core_req_strings[CORE_REQ_COMPUTE]; - switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { - case BASE_JD_REQ_DEP: - return core_req_strings[CORE_REQ_DEP_ONLY]; - case BASE_JD_REQ_FS: - return core_req_strings[CORE_REQ_FRAGMENT]; - case BASE_JD_REQ_CS: - return core_req_strings[CORE_REQ_VERTEX]; - case BASE_JD_REQ_T: - return core_req_strings[CORE_REQ_TILER]; - case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): - return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; - case (BASE_JD_REQ_FS | BASE_JD_REQ_T): - return core_req_strings[CORE_REQ_FRAGMENT_TILER]; - case (BASE_JD_REQ_CS | BASE_JD_REQ_T): - return core_req_strings[CORE_REQ_VERTEX_TILER]; - case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): - return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; - } - return core_req_strings[CORE_REQ_UNKNOWN]; -} -#endif - -bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) -{ - struct kbase_jd_context *jctx = &kctx->jctx; - int queued = 0; - int i; - int sched_prio; - bool ret; - bool will_fail = false; - - /* Update the TOTAL number of jobs. This includes those not tracked by - * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ - jctx->job_nr++; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) - katom->start_timestamp.tv64 = 0; -#else - katom->start_timestamp = 0; -#endif - katom->udata = user_atom->udata; - katom->kctx = kctx; - katom->nr_extres = user_atom->nr_extres; - katom->extres = NULL; - katom->device_nr = user_atom->device_nr; - katom->jc = user_atom->jc; - katom->core_req = user_atom->core_req; - katom->atom_flags = 0; - katom->retry_count = 0; - katom->need_cache_flush_cores_retained = 0; - katom->pre_dep = NULL; - katom->post_dep = NULL; - katom->x_pre_dep = NULL; - katom->x_post_dep = NULL; - katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; - katom->softjob_data = NULL; - - /* Implicitly sets katom->protected_state.enter as well. */ - katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; - - katom->age = kctx->age_count++; - - INIT_LIST_HEAD(&katom->queue); - INIT_LIST_HEAD(&katom->jd_item); -#ifdef CONFIG_MALI_DMA_FENCE - kbase_fence_dep_count_set(katom, -1); -#endif - - /* Don't do anything if there is a mess up with dependencies. - This is done in a separate cycle to check both the dependencies at ones, otherwise - it will be extra complexity to deal with 1st dependency ( just added to the list ) - if only the 2nd one has invalid config. - */ - for (i = 0; i < 2; i++) { - int dep_atom_number = user_atom->pre_dep[i].atom_id; - base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; - - if (dep_atom_number) { - if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && - dep_atom_type != BASE_JD_DEP_TYPE_DATA) { - katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - - /* Wrong dependency setup. Atom will be sent - * back to user space. Do not record any - * dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX( - katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, - TL_ATOM_STATE_IDLE); - - ret = jd_done_nolock(katom, NULL); - goto out; - } - } - } - - /* Add dependencies */ - for (i = 0; i < 2; i++) { - int dep_atom_number = user_atom->pre_dep[i].atom_id; - base_jd_dep_type dep_atom_type; - struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; - - dep_atom_type = user_atom->pre_dep[i].dependency_type; - kbase_jd_katom_dep_clear(&katom->dep[i]); - - if (!dep_atom_number) - continue; - - if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || - dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - - if (dep_atom->event_code == BASE_JD_EVENT_DONE) - continue; - /* don't stop this atom if it has an order dependency - * only to the failed one, try to submit it through - * the normal path - */ - if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && - dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { - continue; - } - - /* Atom has completed, propagate the error code if any */ - katom->event_code = dep_atom->event_code; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; - - /* This atom is going through soft replay or - * will be sent back to user space. Do not record any - * dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, - TL_ATOM_STATE_IDLE); - - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_REPLAY) { - if (kbase_replay_process(katom)) { - ret = false; - goto out; - } - } - will_fail = true; - - } else { - /* Atom is in progress, add this atom to the list */ - list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); - kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); - queued = 1; - } - } - - if (will_fail) { - if (!queued) { - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* This softjob has failed due to a previous - * dependency, however we should still run the - * prepare & finish functions - */ - int err = kbase_prepare_soft_job(katom); - - if (err >= 0) - kbase_finish_soft_job(katom); - } - - ret = jd_done_nolock(katom, NULL); - - goto out; - } else { - - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* This softjob has failed due to a previous - * dependency, however we should still run the - * prepare & finish functions - */ - if (kbase_prepare_soft_job(katom) != 0) { - katom->event_code = - BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - } - - katom->will_fail_event_code = katom->event_code; - ret = false; - - goto out; - } - } else { - /* These must occur after the above loop to ensure that an atom - * that depends on a previous atom with the same number behaves - * as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; - } - - /* For invalid priority, be most lenient and choose the default */ - sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); - if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) - sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; - katom->sched_priority = sched_prio; - - /* Create a new atom. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx); - - /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { - dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - - /* Reject atoms with an invalid device_nr */ - if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && - (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid device_nr %d", - katom->device_nr); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - - /* Reject atoms with invalid core requirements */ - if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid core requirements"); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; - ret = jd_done_nolock(katom, NULL); - goto out; - } - - /* Reject soft-job atom of certain types from accessing external resources */ - if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || - ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || - ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { - dev_warn(kctx->kbdev->dev, - "Rejecting soft-job atom accessing external resources"); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - - if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - /* handle what we need to do to access the external resources */ - if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { - /* setup failed (no access, bad resource, unknown resource types, etc.) */ - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - } - - /* Validate the atom. Function will return error if the atom is - * malformed. - * - * Soft-jobs never enter the job scheduler but have their own initialize method. - * - * If either fail then we immediately complete the atom with an error. - */ - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { - if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - } else { - /* Soft-job */ - if (kbase_prepare_soft_job(katom) != 0) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - ret = jd_done_nolock(katom, NULL); - goto out; - } - } - -#ifdef CONFIG_GPU_TRACEPOINTS - katom->work_id = atomic_inc_return(&jctx->work_id); - trace_gpu_job_enqueue(kctx->id, katom->work_id, - kbasep_map_core_reqs_to_string(katom->core_req)); -#endif - - if (queued && !IS_GPU_ATOM(katom)) { - ret = false; - goto out; - } - -#ifdef CONFIG_MALI_DMA_FENCE - if (kbase_fence_dep_count_read(katom) != -1) { - ret = false; - goto out; - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_REPLAY) { - if (kbase_replay_process(katom)) - ret = false; - else - ret = jd_done_nolock(katom, NULL); - - goto out; - } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - if (kbase_process_soft_job(katom) == 0) { - kbase_finish_soft_job(katom); - ret = jd_done_nolock(katom, NULL); - goto out; - } - - ret = false; - } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { - katom->status = KBASE_JD_ATOM_STATE_IN_JS; - ret = kbasep_js_add_job(kctx, katom); - /* If job was cancelled then resolve immediately */ - if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) - ret = jd_done_nolock(katom, NULL); - } else { - /* This is a pure dependency. Resolve it immediately */ - ret = jd_done_nolock(katom, NULL); - } - - out: - return ret; -} - -int kbase_jd_submit(struct kbase_context *kctx, - void __user *user_addr, u32 nr_atoms, u32 stride, - bool uk6_atom) -{ - struct kbase_jd_context *jctx = &kctx->jctx; - int err = 0; - int i; - bool need_to_try_schedule_context = false; - struct kbase_device *kbdev; - u32 latest_flush; - - /* - * kbase_jd_submit isn't expected to fail and so all errors with the - * jobs are reported by immediately failing them (through event system) - */ - kbdev = kctx->kbdev; - - beenthere(kctx, "%s", "Enter"); - - if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); - return -EINVAL; - } - - if (stride != sizeof(base_jd_atom_v2)) { - dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); - return -EINVAL; - } - - /* All atoms submitted in this call have the same flush ID */ - latest_flush = kbase_backend_get_current_flush_id(kbdev); - - for (i = 0; i < nr_atoms; i++) { - struct base_jd_atom_v2 user_atom; - struct kbase_jd_atom *katom; - - if (copy_from_user(&user_atom, user_addr, - sizeof(user_atom)) != 0) { - err = -EINVAL; - break; - } - - user_addr = (void __user *)((uintptr_t) user_addr + stride); - - mutex_lock(&jctx->lock); -#ifndef compiletime_assert -#define compiletime_assert_defined -#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ -while (false) -#endif - compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == - BASE_JD_ATOM_COUNT, - "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); - compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == - sizeof(user_atom.atom_number), - "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); -#ifdef compiletime_assert_defined -#undef compiletime_assert -#undef compiletime_assert_defined -#endif - katom = &jctx->atoms[user_atom.atom_number]; - - /* Record the flush ID for the cache flush optimisation */ - katom->flush_id = latest_flush; - - while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { - /* Atom number is already in use, wait for the atom to - * complete - */ - mutex_unlock(&jctx->lock); - - /* This thread will wait for the atom to complete. Due - * to thread scheduling we are not sure that the other - * thread that owns the atom will also schedule the - * context, so we force the scheduler to be active and - * hence eventually schedule this context at some point - * later. - */ - kbase_js_sched_all(kbdev); - - if (wait_event_killable(katom->completed, - katom->status == - KBASE_JD_ATOM_STATE_UNUSED) != 0) { - /* We're being killed so the result code - * doesn't really matter - */ - return 0; - } - mutex_lock(&jctx->lock); - } - - need_to_try_schedule_context |= - jd_submit_atom(kctx, &user_atom, katom); - - /* Register a completed job as a disjoint event when the GPU is in a disjoint state - * (ie. being reset or replaying jobs). - */ - kbase_disjoint_event_potential(kbdev); - - mutex_unlock(&jctx->lock); - } - - if (need_to_try_schedule_context) - kbase_js_sched_all(kbdev); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_jd_submit); - -void kbase_jd_done_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); - struct kbase_jd_context *jctx; - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; - u64 cache_jc = katom->jc; - struct kbasep_js_atom_retained_state katom_retained_state; - bool context_idle; - base_jd_core_req core_req = katom->core_req; - - /* Soft jobs should never reach this function */ - KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); - - kctx = katom->kctx; - jctx = &kctx->jctx; - kbdev = kctx->kbdev; - js_kctx_info = &kctx->jctx.sched_info; - js_devdata = &kbdev->js_data; - - KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); - - kbase_backend_complete_wq(kbdev, katom); - - /* - * Begin transaction on JD context and JS context - */ - mutex_lock(&jctx->lock); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE); - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - - /* This worker only gets called on contexts that are scheduled *in*. This is - * because it only happens in response to an IRQ from a job that was - * running. - */ - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - if (katom->event_code == BASE_JD_EVENT_STOPPED) { - /* Atom has been promoted to stopped */ - unsigned long flags; - - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - katom->status = KBASE_JD_ATOM_STATE_IN_JS; - kbase_js_unpull(kctx, katom); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&jctx->lock); - - return; - } - - if ((katom->event_code != BASE_JD_EVENT_DONE) && - (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) - dev_err(kbdev->dev, - "t6xx: GPU fault 0x%02lx from job slot %d\n", - (unsigned long)katom->event_code, - katom->slot_nr); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) - kbase_as_poking_timer_release_atom(kbdev, kctx, katom); - - /* Retain state before the katom disappears */ - kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); - - context_idle = kbase_js_complete_atom_wq(kctx, katom); - - KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); - - kbasep_js_remove_job(kbdev, kctx, katom); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; - /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ - jd_done_nolock(katom, &kctx->completed_jobs); - - /* katom may have been freed now, do not use! */ - - if (context_idle) { - unsigned long flags; - - context_idle = false; - mutex_lock(&js_devdata->queue_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* If kbase_sched() has scheduled this context back in then - * KCTX_ACTIVE will have been set after we marked it as - * inactive, and another pm reference will have been taken, so - * drop our reference. But do not call kbase_jm_idle_ctx(), as - * the context is active and fast-starting is allowed. - * - * If an atom has been fast-started then kctx->atoms_pulled will - * be non-zero but KCTX_ACTIVE will still be false (as the - * previous pm reference has been inherited). Do NOT drop our - * reference, as it has been re-used, and leave the context as - * active. - * - * If no new atoms have been started then KCTX_ACTIVE will still - * be false and atoms_pulled will be zero, so drop the reference - * and call kbase_jm_idle_ctx(). - * - * As the checks are done under both the queue_mutex and - * hwaccess_lock is should be impossible for this to race - * with the scheduler code. - */ - if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || - !atomic_read(&kctx->atoms_pulled)) { - /* Calling kbase_jm_idle_ctx() here will ensure that - * atoms are not fast-started when we drop the - * hwaccess_lock. This is not performed if - * KCTX_ACTIVE is set as in that case another pm - * reference has been taken and a fast-start would be - * valid. - */ - if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) - kbase_jm_idle_ctx(kbdev, kctx); - context_idle = true; - } else { - kbase_ctx_flag_set(kctx, KCTX_ACTIVE); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&js_devdata->queue_mutex); - } - - /* - * Transaction complete - */ - mutex_unlock(&jctx->lock); - - /* Job is now no longer running, so can now safely release the context - * reference, and handle any actions that were logged against the atom's retained state */ - - kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); - - kbase_js_sched_all(kbdev); - - if (!atomic_dec_return(&kctx->work_count)) { - /* If worker now idle then post all events that jd_done_nolock() - * has queued */ - mutex_lock(&jctx->lock); - while (!list_empty(&kctx->completed_jobs)) { - struct kbase_jd_atom *atom = list_entry( - kctx->completed_jobs.next, - struct kbase_jd_atom, jd_item); - list_del(kctx->completed_jobs.next); - - kbase_event_post(kctx, atom); - } - mutex_unlock(&jctx->lock); - } - - kbase_backend_complete_wq_post_sched(kbdev, core_req); - - if (context_idle) - kbase_pm_context_idle(kbdev); - - KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); -} - -/** - * jd_cancel_worker - Work queue job cancel function. - * @data: a &struct work_struct - * - * Only called as part of 'Zapping' a context (which occurs on termination). - * Operates serially with the kbase_jd_done_worker() on the work queue. - * - * This can only be called on contexts that aren't scheduled. - * - * We don't need to release most of the resources that would occur on - * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be - * running (by virtue of only being called on contexts that aren't - * scheduled). - */ -static void jd_cancel_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); - struct kbase_jd_context *jctx; - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - bool need_to_try_schedule_context; - bool attr_state_changed; - struct kbase_device *kbdev; - - /* Soft jobs should never reach this function */ - KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); - - kctx = katom->kctx; - kbdev = kctx->kbdev; - jctx = &kctx->jctx; - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); - - /* This only gets called on contexts that are scheduled out. Hence, we must - * make sure we don't de-ref the number of running jobs (there aren't - * any), nor must we try to schedule out the context (it's already - * scheduled out). - */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - /* Scheduler: Remove the job from the system */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&jctx->lock); - - need_to_try_schedule_context = jd_done_nolock(katom, NULL); - /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to - * schedule the context. There's also no need for the jsctx_mutex to have been taken - * around this too. */ - KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); - - /* katom may have been freed now, do not use! */ - mutex_unlock(&jctx->lock); - - if (attr_state_changed) - kbase_js_sched_all(kbdev); -} - -/** - * kbase_jd_done - Complete a job that has been removed from the Hardware - * @katom: atom which has been completed - * @slot_nr: slot the atom was on - * @end_timestamp: completion time - * @done_code: completion code - * - * This must be used whenever a job has been removed from the Hardware, e.g.: - * An IRQ indicates that the job finished (for both error and 'done' codes), or - * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. - * - * Some work is carried out immediately, and the rest is deferred onto a - * workqueue - * - * Context: - * This can be called safely from atomic context. - * The caller must hold kbdev->hwaccess_lock - */ -void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, - ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) -{ - struct kbase_context *kctx; - struct kbase_device *kbdev; - - KBASE_DEBUG_ASSERT(katom); - kctx = katom->kctx; - KBASE_DEBUG_ASSERT(kctx); - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev); - - if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) - katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - - KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); - - kbase_job_check_leave_disjoint(kbdev, katom); - - katom->slot_nr = slot_nr; - - atomic_inc(&kctx->work_count); - -#ifdef CONFIG_DEBUG_FS - /* a failed job happened and is waiting for dumping*/ - if (!katom->will_fail_event_code && - kbase_debug_job_fault_process(katom, katom->event_code)) - return; -#endif - - WARN_ON(work_pending(&katom->work)); - INIT_WORK(&katom->work, kbase_jd_done_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} - -KBASE_EXPORT_TEST_API(kbase_jd_done); - -void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != katom); - kctx = katom->kctx; - KBASE_DEBUG_ASSERT(NULL != kctx); - - KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); - - /* This should only be done from a context that is not scheduled */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - WARN_ON(work_pending(&katom->work)); - - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - INIT_WORK(&katom->work, jd_cancel_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} - - -void kbase_jd_zap_context(struct kbase_context *kctx) -{ - struct kbase_jd_atom *katom; - struct list_head *entry, *tmp; - struct kbase_device *kbdev; - - KBASE_DEBUG_ASSERT(kctx); - - kbdev = kctx->kbdev; - - KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); - - kbase_js_zap_context(kctx); - - mutex_lock(&kctx->jctx.lock); - - /* - * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are - * queued outside the job scheduler. - */ - - del_timer_sync(&kctx->soft_job_timeout); - list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - katom = list_entry(entry, struct kbase_jd_atom, queue); - kbase_cancel_soft_job(katom); - } - - -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_cancel_all_atoms(kctx); -#endif - - mutex_unlock(&kctx->jctx.lock); - -#ifdef CONFIG_MALI_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - */ - flush_workqueue(kctx->dma_fence.wq); -#endif - -#ifdef CONFIG_DEBUG_FS - kbase_debug_job_fault_kctx_unblock(kctx); -#endif - - kbase_jm_wait_for_zero_jobs(kctx); -} - -KBASE_EXPORT_TEST_API(kbase_jd_zap_context); - -int kbase_jd_init(struct kbase_context *kctx) -{ - int i; - int mali_err = 0; - - KBASE_DEBUG_ASSERT(kctx); - - kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", - WQ_HIGHPRI | WQ_UNBOUND, 1); - if (NULL == kctx->jctx.job_done_wq) { - mali_err = -ENOMEM; - goto out1; - } - - for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { - init_waitqueue_head(&kctx->jctx.atoms[i].completed); - - INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); - INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); - - /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ - kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; - kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; - -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - kctx->jctx.atoms[i].dma_fence.context = - dma_fence_context_alloc(1); - atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); - INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); -#endif - } - - mutex_init(&kctx->jctx.lock); - - init_waitqueue_head(&kctx->jctx.zero_jobs_wait); - - spin_lock_init(&kctx->jctx.tb_lock); - - kctx->jctx.job_nr = 0; - INIT_LIST_HEAD(&kctx->completed_jobs); - atomic_set(&kctx->work_count, 0); - - return 0; - - out1: - return mali_err; -} - -KBASE_EXPORT_TEST_API(kbase_jd_init); - -void kbase_jd_exit(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx); - - /* Work queue is emptied by this */ - destroy_workqueue(kctx->jctx.job_done_wq); -} - -KBASE_EXPORT_TEST_API(kbase_jd_exit); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.c deleted file mode 100755 index 7b15d8a05bfd..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifdef CONFIG_DEBUG_FS - -#include -#include -#include -#include -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -#include -#endif -#include - -struct kbase_jd_debugfs_depinfo { - u8 id; - char type; -}; - -static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, - struct seq_file *sfile) -{ -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - struct kbase_sync_fence_info info; - int res; - - switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - res = kbase_sync_fence_out_info_get(atom, &info); - if (0 == res) { - seq_printf(sfile, "Sa([%p]%d) ", - info.fence, info.status); - break; - } - case BASE_JD_REQ_SOFT_FENCE_WAIT: - res = kbase_sync_fence_in_info_get(atom, &info); - if (0 == res) { - seq_printf(sfile, "Wa([%p]%d) ", - info.fence, info.status); - break; - } - default: - break; - } -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ - -#ifdef CONFIG_MALI_DMA_FENCE - if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - struct kbase_fence_cb *cb; - - if (atom->dma_fence.fence) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence = atom->dma_fence.fence; -#else - struct dma_fence *fence = atom->dma_fence.fence; -#endif - - seq_printf(sfile, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - "Sd(%u#%u: %s) ", -#else - "Sd(%llu#%u: %s) ", -#endif - fence->context, - fence->seqno, - dma_fence_is_signaled(fence) ? - "signaled" : "active"); - } - - list_for_each_entry(cb, &atom->dma_fence.callbacks, - node) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence = cb->fence; -#else - struct dma_fence *fence = cb->fence; -#endif - - seq_printf(sfile, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - "Wd(%u#%u: %s) ", -#else - "Wd(%llu#%u: %s) ", -#endif - fence->context, - fence->seqno, - dma_fence_is_signaled(fence) ? - "signaled" : "active"); - } - } -#endif /* CONFIG_MALI_DMA_FENCE */ - -} - -static void kbasep_jd_debugfs_atom_deps( - struct kbase_jd_debugfs_depinfo *deps, - struct kbase_jd_atom *atom) -{ - struct kbase_context *kctx = atom->kctx; - int i; - - for (i = 0; i < 2; i++) { - deps[i].id = (unsigned)(atom->dep[i].atom ? - kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); - - switch (atom->dep[i].dep_type) { - case BASE_JD_DEP_TYPE_INVALID: - deps[i].type = ' '; - break; - case BASE_JD_DEP_TYPE_DATA: - deps[i].type = 'D'; - break; - case BASE_JD_DEP_TYPE_ORDER: - deps[i].type = '>'; - break; - default: - deps[i].type = '?'; - break; - } - } -} -/** - * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. - * @sfile: The debugfs entry - * @data: Data associated with the entry - * - * This function is called to get the contents of the JD atoms debugfs file. - * This is a report of all atoms managed by kbase_jd_context.atoms - * - * Return: 0 if successfully prints data in debugfs entry file, failure - * otherwise - */ -static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) -{ - struct kbase_context *kctx = sfile->private; - struct kbase_jd_atom *atoms; - unsigned long irq_flags; - int i; - - KBASE_DEBUG_ASSERT(kctx != NULL); - - /* Print version */ - seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); - - /* Print U/K API version */ - seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, - BASE_UK_VERSION_MINOR); - - /* Print table heading */ - seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); - - atoms = kctx->jctx.atoms; - /* General atom states */ - mutex_lock(&kctx->jctx.lock); - /* JS-related states */ - spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); - for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { - struct kbase_jd_atom *atom = &atoms[i]; - s64 start_timestamp = 0; - struct kbase_jd_debugfs_depinfo deps[2]; - - if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) - continue; - - /* start_timestamp is cleared as soon as the atom leaves UNUSED state - * and set before a job is submitted to the h/w, a non-zero value means - * it is valid */ - if (ktime_to_ns(atom->start_timestamp)) - start_timestamp = ktime_to_ns( - ktime_sub(ktime_get(), atom->start_timestamp)); - - kbasep_jd_debugfs_atom_deps(deps, atom); - - seq_printf(sfile, - "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", - i, atom->core_req, atom->status, - deps[0].type, deps[0].id, - deps[1].type, deps[1].id, - start_timestamp); - - - kbase_jd_debugfs_fence_info(atom, sfile); - - seq_puts(sfile, "\n"); - } - spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); - mutex_unlock(&kctx->jctx.lock); - - return 0; -} - - -/** - * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file - * @in: &struct inode pointer - * @file: &struct file pointer - * - * Return: file descriptor - */ -static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) -{ - return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); -} - -static const struct file_operations kbasep_jd_debugfs_atoms_fops = { - .open = kbasep_jd_debugfs_atoms_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx != NULL); - - /* Expose all atoms */ - debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, - &kbasep_jd_debugfs_atoms_fops); - -} - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.h deleted file mode 100755 index 697bdef4d434..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jd_debugfs.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_jd_debugfs.h - * Header file for job dispatcher-related entries in debugfs - */ - -#ifndef _KBASE_JD_DEBUGFS_H -#define _KBASE_JD_DEBUGFS_H - -#include - -#define MALI_JD_DEBUGFS_VERSION 3 - -/* Forward declarations */ -struct kbase_context; - -/** - * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system - * - * @kctx Pointer to kbase_context - */ -void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); - -#endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.c deleted file mode 100755 index da78a1670d9b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * HW access job manager common APIs - */ - -#include -#include "mali_kbase_hwaccess_jm.h" -#include "mali_kbase_jm.h" - -/** - * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot - * @js on the active context. - * @kbdev: Device pointer - * @js: Job slot to run on - * @nr_jobs_to_submit: Number of jobs to attempt to submit - * - * Return: true if slot can still be submitted on, false if slot is now full. - */ -static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, - int nr_jobs_to_submit) -{ - struct kbase_context *kctx; - int i; - - kctx = kbdev->hwaccess.active_kctx[js]; - - if (!kctx) - return true; - - for (i = 0; i < nr_jobs_to_submit; i++) { - struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); - - if (!katom) - return true; /* Context has no jobs on this slot */ - - kbase_backend_run_atom(kbdev, katom); - } - - return false; /* Slot ringbuffer should now be full */ -} - -u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) -{ - u32 ret_mask = 0; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - while (js_mask) { - int js = ffs(js_mask) - 1; - int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); - - if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) - ret_mask |= (1 << js); - - js_mask &= ~(1 << js); - } - - return ret_mask; -} - -void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!down_trylock(&js_devdata->schedule_sem)) { - kbase_jm_kick(kbdev, js_mask); - up(&js_devdata->schedule_sem); - } -} - -void kbase_jm_try_kick_all(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!down_trylock(&js_devdata->schedule_sem)) { - kbase_jm_kick_all(kbdev); - up(&js_devdata->schedule_sem); - } -} - -void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -{ - int js; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == kctx) - kbdev->hwaccess.active_kctx[js] = NULL; - } -} - -struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (katom->event_code != BASE_JD_EVENT_STOPPED && - katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { - return kbase_js_complete_atom(katom, NULL); - } else { - kbase_js_unpull(katom->kctx, katom); - return NULL; - } -} - -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - return kbase_js_complete_atom(katom, end_timestamp); -} - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.h deleted file mode 100755 index c468ea4d20a5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_jm.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/* - * Job manager common APIs - */ - -#ifndef _KBASE_JM_H_ -#define _KBASE_JM_H_ - -/** - * kbase_jm_kick() - Indicate that there are jobs ready to run. - * @kbdev: Device pointer - * @js_mask: Mask of the job slots that can be pulled from. - * - * Caller must hold the hwaccess_lock and schedule_sem semaphore - * - * Return: Mask of the job slots that can still be submitted to. - */ -u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); - -/** - * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job - * slots. - * @kbdev: Device pointer - * - * Caller must hold the hwaccess_lock and schedule_sem semaphore - * - * Return: Mask of the job slots that can still be submitted to. - */ -static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) -{ - return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -} - -/** - * kbase_jm_try_kick - Attempt to call kbase_jm_kick - * @kbdev: Device pointer - * @js_mask: Mask of the job slots that can be pulled from - * Context: Caller must hold hwaccess_lock - * - * If schedule_sem can be immediately obtained then this function will call - * kbase_jm_kick() otherwise it will do nothing. - */ -void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); - -/** - * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all - * @kbdev: Device pointer - * Context: Caller must hold hwaccess_lock - * - * If schedule_sem can be immediately obtained then this function will call - * kbase_jm_kick_all() otherwise it will do nothing. - */ -void kbase_jm_try_kick_all(struct kbase_device *kbdev); - -/** - * kbase_jm_idle_ctx() - Mark a context as idle. - * @kbdev: Device pointer - * @kctx: Context to mark as idle - * - * No more atoms will be pulled from this context until it is marked as active - * by kbase_js_use_ctx(). - * - * The context should have no atoms currently pulled from it - * (kctx->atoms_pulled == 0). - * - * Caller must hold the hwaccess_lock - */ -void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has - * been soft-stopped or will fail due to a - * dependency - * @kbdev: Device pointer - * @katom: Atom that has been stopped or will be failed - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none - */ -struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_jm_complete() - Complete an atom - * @kbdev: Device pointer - * @katom: Atom that has completed - * @end_timestamp: Timestamp of atom completion - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none - */ -struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, ktime_t *end_timestamp); - -#endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.c deleted file mode 100755 index 80b6d77e2fb0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.c +++ /dev/null @@ -1,2893 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* - * Job Scheduler Implementation - */ -#include -#include -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include -#endif -#include -#include -#include - -#include -#include - -#include "mali_kbase_jm.h" -#include "mali_kbase_hwaccess_jm.h" - -/* - * Private types - */ - -/* Bitpattern indicating the result of releasing a context */ -enum { - /* The context was descheduled - caller should try scheduling in a new - * one to keep the runpool full */ - KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), - /* Ctx attributes were changed - caller should try scheduling all - * contexts */ - KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) -}; - -typedef u32 kbasep_js_release_result; - -const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { - KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ - KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ - KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ -}; - -const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { - BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ - BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ - BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ -}; - - -/* - * Private function prototypes - */ -static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( - struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state); - -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb callback); - -/* Helper for trace subcodes */ -#if KBASE_TRACE_ENABLE -static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - return atomic_read(&kctx->refcount); -} -#else /* KBASE_TRACE_ENABLE */ -static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - CSTD_UNUSED(kbdev); - CSTD_UNUSED(kctx); - return 0; -} -#endif /* KBASE_TRACE_ENABLE */ - -/* - * Private functions - */ - -/** - * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements - * @features: JSn_FEATURE register value - * - * Given a JSn_FEATURE register value returns the core requirements that match - * - * Return: Core requirement bit mask - */ -static base_jd_core_req core_reqs_from_jsn_features(u16 features) -{ - base_jd_core_req core_req = 0u; - - if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) - core_req |= BASE_JD_REQ_V; - - if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) - core_req |= BASE_JD_REQ_CF; - - if ((features & JS_FEATURE_COMPUTE_JOB) != 0) - core_req |= BASE_JD_REQ_CS; - - if ((features & JS_FEATURE_TILER_JOB) != 0) - core_req |= BASE_JD_REQ_T; - - if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) - core_req |= BASE_JD_REQ_FS; - - return core_req; -} - -static void kbase_js_sync_timers(struct kbase_device *kbdev) -{ - mutex_lock(&kbdev->js_data.runpool_mutex); - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&kbdev->js_data.runpool_mutex); -} - -/* Hold the mmu_hw_mutex and hwaccess_lock for this */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - bool result = false; - int as_nr; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - lockdep_assert_held(&kbdev->hwaccess_lock); - - as_nr = kctx->as_nr; - if (atomic_read(&kctx->refcount) > 0) { - KBASE_DEBUG_ASSERT(as_nr >= 0); - - kbase_ctx_sched_retain_ctx_refcount(kctx); - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, - NULL, 0u, atomic_read(&kctx->refcount)); - result = true; - } - - return result; -} - -/** - * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority to check. - * - * Return true if there are no atoms to pull. There may be running atoms in the - * ring buffer even if there are no atoms to pull. It is also possible for the - * ring buffer to be full (with running atoms) when this functions returns - * true. - * - * Return: true if there are no atoms to pull, false otherwise. - */ -static inline bool -jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - return RB_EMPTY_ROOT(&rb->runnable_tree); -} - -/** - * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no - * pullable atoms - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * - * Caller must hold hwaccess_lock - * - * Return: true if the ring buffers for all priorities have no pullable atoms, - * false otherwise. - */ -static inline bool -jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) -{ - int prio; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { - if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) - return false; - } - - return true; -} - -/** - * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. - * @kctx: Pointer to kbase context with the queue. - * @js: Job slot id to iterate. - * @prio: Priority id to iterate. - * @callback: Function pointer to callback. - * - * Iterate over a queue and invoke @callback for each entry in the queue, and - * remove the entry from the queue. - * - * If entries are added to the queue while this is running those entries may, or - * may not be covered. To ensure that all entries in the buffer have been - * enumerated when this function returns jsctx->lock must be held when calling - * this function. - * - * The HW access lock must always be held when calling this function. - */ -static void -jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, - kbasep_js_ctx_job_cb callback) -{ - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { - struct rb_node *node = rb_first(&queue->runnable_tree); - struct kbase_jd_atom *entry = rb_entry(node, - struct kbase_jd_atom, runnable_tree_node); - - rb_erase(node, &queue->runnable_tree); - callback(kctx->kbdev, entry); - } - - while (!list_empty(&queue->x_dep_head)) { - struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, - struct kbase_jd_atom, queue); - - list_del(queue->x_dep_head.next); - - callback(kctx->kbdev, entry); - } -} - -/** - * jsctx_queue_foreach(): - Execute callback for each entry in every queue - * @kctx: Pointer to kbase context with queue. - * @js: Job slot id to iterate. - * @callback: Function pointer to callback. - * - * Iterate over all the different priorities, and for each call - * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback - * for each entry, and remove the entry from the queue. - */ -static inline void -jsctx_queue_foreach(struct kbase_context *kctx, int js, - kbasep_js_ctx_job_cb callback) -{ - int prio; - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_queue_foreach_prio(kctx, js, prio, callback); -} - -/** - * jsctx_rb_peek_prio(): - Check buffer and get next atom - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority id to check. - * - * Check the ring buffer for the specified @js and @prio and return a pointer to - * the next atom, unless the ring buffer is empty. - * - * Return: Pointer to next atom in buffer, or NULL if there is no atom. - */ -static inline struct kbase_jd_atom * -jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - struct rb_node *node; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - node = rb_first(&rb->runnable_tree); - if (!node) - return NULL; - - return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); -} - -/** - * jsctx_rb_peek(): - Check all priority buffers and get next atom - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * - * Check the ring buffers for all priorities, starting from - * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a - * pointer to the next atom, unless all the priority's ring buffers are empty. - * - * Caller must hold the hwaccess_lock. - * - * Return: Pointer to next atom in buffer, or NULL if there is no atom. - */ -static inline struct kbase_jd_atom * -jsctx_rb_peek(struct kbase_context *kctx, int js) -{ - int prio; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { - struct kbase_jd_atom *katom; - - katom = jsctx_rb_peek_prio(kctx, js, prio); - if (katom) - return katom; - } - - return NULL; -} - -/** - * jsctx_rb_pull(): - Mark atom in list as running - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to pull. - * - * Mark an atom previously obtained from jsctx_rb_peek() as running. - * - * @katom must currently be at the head of the ring buffer. - */ -static inline void -jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - /* Atoms must be pulled in the correct order. */ - WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - - rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); -} - -#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) - -static void -jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - while (*new) { - struct kbase_jd_atom *entry = container_of(*new, - struct kbase_jd_atom, runnable_tree_node); - - parent = *new; - if (LESS_THAN_WRAP(katom->age, entry->age)) - new = &((*new)->rb_left); - else - new = &((*new)->rb_right); - } - - /* Add new node and rebalance tree. */ - rb_link_node(&katom->runnable_tree_node, parent, new); - rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); - - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_READY); -} - -/** - * jsctx_rb_unpull(): - Undo marking of atom in list as running - * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to unpull. - * - * Undo jsctx_rb_pull() and put @katom back in the queue. - * - * jsctx_rb_unpull() must be called on atoms in the same order the atoms were - * pulled. - */ -static inline void -jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - jsctx_tree_add(kctx, katom); -} - -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, - int js, - bool is_scheduled); -static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); -static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); - -/* - * Functions private to KBase ('Protected' functions) - */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev) -{ - struct kbasep_js_device_data *jsdd; - int i, j; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - jsdd = &kbdev->js_data; - -#ifdef CONFIG_MALI_DEBUG - /* Soft-stop will be disabled on a single context by default unless - * softstop_always is set */ - jsdd->softstop_always = false; -#endif /* CONFIG_MALI_DEBUG */ - jsdd->nr_all_contexts_running = 0; - jsdd->nr_user_contexts_running = 0; - jsdd->nr_contexts_pullable = 0; - atomic_set(&jsdd->nr_contexts_runnable, 0); - /* No ctx allowed to submit */ - jsdd->runpool_irq.submit_allowed = 0u; - memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, - sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); - memset(jsdd->runpool_irq.slot_affinities, 0, - sizeof(jsdd->runpool_irq.slot_affinities)); - memset(jsdd->runpool_irq.slot_affinity_refcount, 0, - sizeof(jsdd->runpool_irq.slot_affinity_refcount)); - INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); - - /* Config attributes */ - jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; - jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; - jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408; - else - jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; - jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; - jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) - jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408; - else - jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; - jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; - jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; - jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; - atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); - - dev_dbg(kbdev->dev, "JS Config Attribs: "); - dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", - jsdd->scheduling_period_ns); - dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", - jsdd->soft_stop_ticks); - dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", - jsdd->soft_stop_ticks_cl); - dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", - jsdd->hard_stop_ticks_ss); - dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", - jsdd->hard_stop_ticks_cl); - dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", - jsdd->hard_stop_ticks_dumping); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", - jsdd->gpu_reset_ticks_ss); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", - jsdd->gpu_reset_ticks_cl); - dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", - jsdd->gpu_reset_ticks_dumping); - dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", - jsdd->ctx_timeslice_ns); - dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", - atomic_read(&jsdd->soft_job_timeout_ms)); - - if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && - jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && - jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && - jsdd->hard_stop_ticks_dumping < - jsdd->gpu_reset_ticks_dumping)) { - dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); - return -EINVAL; - } - -#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS - dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", - jsdd->soft_stop_ticks, - jsdd->scheduling_period_ns); -#endif -#if KBASE_DISABLE_SCHEDULING_HARD_STOPS - dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", - jsdd->hard_stop_ticks_ss, - jsdd->hard_stop_ticks_dumping, - jsdd->scheduling_period_ns); -#endif -#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS - dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); -#endif - - for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) - jsdd->js_reqs[i] = core_reqs_from_jsn_features( - kbdev->gpu_props.props.raw_props.js_features[i]); - - /* On error, we could continue on: providing none of the below resources - * rely on the ones above */ - - mutex_init(&jsdd->runpool_mutex); - mutex_init(&jsdd->queue_mutex); - spin_lock_init(&kbdev->hwaccess_lock); - sema_init(&jsdd->schedule_sem, 1); - - for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { - for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { - INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); - INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); - } - } - - return 0; -} - -void kbasep_js_devdata_halt(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -void kbasep_js_devdata_term(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata; - s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - js_devdata = &kbdev->js_data; - - /* The caller must de-register all contexts before calling this - */ - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); - KBASE_DEBUG_ASSERT(memcmp( - js_devdata->runpool_irq.ctx_attr_ref_count, - zero_ctx_attr_ref_count, - sizeof(zero_ctx_attr_ref_count)) == 0); - CSTD_UNUSED(zero_ctx_attr_ref_count); -} - -int kbasep_js_kctx_init(struct kbase_context *const kctx) -{ - struct kbase_device *kbdev; - struct kbasep_js_kctx_info *js_kctx_info; - int i, j; - - KBASE_DEBUG_ASSERT(kctx != NULL); - - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); - - for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) - INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); - - js_kctx_info = &kctx->jctx.sched_info; - - js_kctx_info->ctx.nr_jobs = 0; - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); - kbase_ctx_flag_clear(kctx, KCTX_DYING); - memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, - sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); - - /* Initially, the context is disabled from submission until the create - * flags are set */ - kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); - - /* On error, we could continue on: providing none of the below resources - * rely on the ones above */ - mutex_init(&js_kctx_info->ctx.jsctx_mutex); - - init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); - - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { - for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { - INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); - kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; - } - } - - return 0; -} - -void kbasep_js_kctx_term(struct kbase_context *kctx) -{ - struct kbase_device *kbdev; - struct kbasep_js_kctx_info *js_kctx_info; - int js; - bool update_ctx_count = false; - unsigned long flags; - - KBASE_DEBUG_ASSERT(kctx != NULL); - - kbdev = kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev != NULL); - - js_kctx_info = &kctx->jctx.sched_info; - - /* The caller must de-register all jobs before calling this */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); - - mutex_lock(&kbdev->js_data.queue_mutex); - mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { - WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); - atomic_dec(&kbdev->js_data.nr_contexts_runnable); - update_ctx_count = true; - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); - } - - mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_unlock(&kbdev->js_data.queue_mutex); - - if (update_ctx_count) { - mutex_lock(&kbdev->js_data.runpool_mutex); - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&kbdev->js_data.runpool_mutex); - } -} - -/** - * kbase_js_ctx_list_add_pullable_nolock - Variant of - * kbase_jd_ctx_list_add_pullable() - * where the caller must hold - * hwaccess_lock - * @kbdev: Device pointer - * @kctx: Context to add to queue - * @js: Job slot to use - * - * Caller must hold hwaccess_lock - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - bool ret = false; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - - list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); - - if (!kctx->slots_pullable) { - kbdev->js_data.nr_contexts_pullable++; - ret = true; - if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); - atomic_inc(&kbdev->js_data.nr_contexts_runnable); - } - } - kctx->slots_pullable |= (1 << js); - - return ret; -} - -/** - * kbase_js_ctx_list_add_pullable_head_nolock - Variant of - * kbase_js_ctx_list_add_pullable_head() - * where the caller must hold - * hwaccess_lock - * @kbdev: Device pointer - * @kctx: Context to add to queue - * @js: Job slot to use - * - * Caller must hold hwaccess_lock - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_add_pullable_head_nolock( - struct kbase_device *kbdev, struct kbase_context *kctx, int js) -{ - bool ret = false; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - - list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); - - if (!kctx->slots_pullable) { - kbdev->js_data.nr_contexts_pullable++; - ret = true; - if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); - atomic_inc(&kbdev->js_data.nr_contexts_runnable); - } - } - kctx->slots_pullable |= (1 << js); - - return ret; -} - -/** - * kbase_js_ctx_list_add_pullable_head - Add context to the head of the - * per-slot pullable context queue - * @kbdev: Device pointer - * @kctx: Context to add to queue - * @js: Job slot to use - * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the head. - * - * This function should be used when a context has been scheduled, but no jobs - * can currently be pulled from it. - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -/** - * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the - * per-slot unpullable context queue - * @kbdev: Device pointer - * @kctx: Context to add to queue - * @js: Job slot to use - * - * The context must already be on the per-slot pullable queue. It will be - * removed from the pullable queue before being added to the unpullable queue. - * - * This function should be used when a context has been pulled from, and there - * are no jobs remaining on the specified slot. - * - * Caller must hold hwaccess_lock - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - bool ret = false; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); - - if (kctx->slots_pullable == (1 << js)) { - kbdev->js_data.nr_contexts_pullable--; - ret = true; - if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); - atomic_dec(&kbdev->js_data.nr_contexts_runnable); - } - } - kctx->slots_pullable &= ~(1 << js); - - return ret; -} - -/** - * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable - * or unpullable context queues - * @kbdev: Device pointer - * @kctx: Context to remove from queue - * @js: Job slot to use - * - * The context must already be on one of the queues. - * - * This function should be used when a context has no jobs on the GPU, and no - * jobs remaining for the specified slot. - * - * Caller must hold hwaccess_lock - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - bool ret = false; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); - - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - - if (kctx->slots_pullable == (1 << js)) { - kbdev->js_data.nr_contexts_pullable--; - ret = true; - if (!atomic_read(&kctx->atoms_pulled)) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); - atomic_dec(&kbdev->js_data.nr_contexts_runnable); - } - } - kctx->slots_pullable &= ~(1 << js); - - return ret; -} - -/** - * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() - * where the caller must hold - * hwaccess_lock - * @kbdev: Device pointer - * @js: Job slot to use - * - * Caller must hold hwaccess_lock - * - * Return: Context to use for specified slot. - * NULL if no contexts present for specified slot - */ -static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( - struct kbase_device *kbdev, - int js) -{ - struct kbase_context *kctx; - int i; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { - if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) - continue; - - kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, - struct kbase_context, - jctx.sched_info.ctx.ctx_list_entry[js]); - - list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - - return kctx; - } - return NULL; -} - -/** - * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable - * queue. - * @kbdev: Device pointer - * @js: Job slot to use - * - * Return: Context to use for specified slot. - * NULL if no contexts present for specified slot - */ -static struct kbase_context *kbase_js_ctx_list_pop_head( - struct kbase_device *kbdev, int js) -{ - struct kbase_context *kctx; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return kctx; -} - -/** - * kbase_js_ctx_pullable - Return if a context can be pulled from on the - * specified slot - * @kctx: Context pointer - * @js: Job slot to use - * @is_scheduled: true if the context is currently scheduled - * - * Caller must hold hwaccess_lock - * - * Return: true if context can be pulled from on specified slot - * false otherwise - */ -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, - bool is_scheduled) -{ - struct kbasep_js_device_data *js_devdata; - struct kbase_jd_atom *katom; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - js_devdata = &kctx->kbdev->js_data; - - if (is_scheduled) { - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) - return false; - } - katom = jsctx_rb_peek(kctx, js); - if (!katom) - return false; /* No pullable atoms */ - if (kctx->blocked_js[js][katom->sched_priority]) - return false; - if (atomic_read(&katom->blocked)) - return false; /* next atom blocked */ - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { - if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) - return false; - if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) - return false; - } - - return true; -} - -static bool kbase_js_dep_validate(struct kbase_context *kctx, - struct kbase_jd_atom *katom) -{ - struct kbase_device *kbdev = kctx->kbdev; - bool ret = true; - bool has_dep = false, has_x_dep = false; - int js = kbase_js_get_slot(kbdev, katom); - int prio = katom->sched_priority; - int i; - - for (i = 0; i < 2; i++) { - struct kbase_jd_atom *dep_atom = katom->dep[i].atom; - - if (dep_atom) { - int dep_js = kbase_js_get_slot(kbdev, dep_atom); - int dep_prio = dep_atom->sched_priority; - - /* Dependent atom must already have been submitted */ - if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { - ret = false; - break; - } - - /* Dependencies with different priorities can't - be represented in the ringbuffer */ - if (prio != dep_prio) { - ret = false; - break; - } - - if (js == dep_js) { - /* Only one same-slot dependency can be - * represented in the ringbuffer */ - if (has_dep) { - ret = false; - break; - } - /* Each dependee atom can only have one - * same-slot dependency */ - if (dep_atom->post_dep) { - ret = false; - break; - } - has_dep = true; - } else { - /* Only one cross-slot dependency can be - * represented in the ringbuffer */ - if (has_x_dep) { - ret = false; - break; - } - /* Each dependee atom can only have one - * cross-slot dependency */ - if (dep_atom->x_post_dep) { - ret = false; - break; - } - /* The dependee atom can not already be in the - * HW access ringbuffer */ - if (dep_atom->gpu_rb_state != - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - ret = false; - break; - } - /* The dependee atom can not already have - * completed */ - if (dep_atom->status != - KBASE_JD_ATOM_STATE_IN_JS) { - ret = false; - break; - } - /* Cross-slot dependencies must not violate - * PRLAM-8987 affinity restrictions */ - if (kbase_hw_has_issue(kbdev, - BASE_HW_ISSUE_8987) && - (js == 2 || dep_js == 2)) { - ret = false; - break; - } - has_x_dep = true; - } - - /* Dependency can be represented in ringbuffers */ - } - } - - /* If dependencies can be represented by ringbuffer then clear them from - * atom structure */ - if (ret) { - for (i = 0; i < 2; i++) { - struct kbase_jd_atom *dep_atom = katom->dep[i].atom; - - if (dep_atom) { - int dep_js = kbase_js_get_slot(kbdev, dep_atom); - - if ((js != dep_js) && - (dep_atom->status != - KBASE_JD_ATOM_STATE_COMPLETED) - && (dep_atom->status != - KBASE_JD_ATOM_STATE_HW_COMPLETED) - && (dep_atom->status != - KBASE_JD_ATOM_STATE_UNUSED)) { - - katom->atom_flags |= - KBASE_KATOM_FLAG_X_DEP_BLOCKED; - katom->x_pre_dep = dep_atom; - dep_atom->x_post_dep = katom; - if (kbase_jd_katom_dep_type( - &katom->dep[i]) == - BASE_JD_DEP_TYPE_DATA) - katom->atom_flags |= - KBASE_KATOM_FLAG_FAIL_BLOCKER; - } - if ((kbase_jd_katom_dep_type(&katom->dep[i]) - == BASE_JD_DEP_TYPE_DATA) && - (js == dep_js)) { - katom->pre_dep = dep_atom; - dep_atom->post_dep = katom; - } - - list_del(&katom->dep_item[i]); - kbase_jd_katom_dep_clear(&katom->dep[i]); - } - } - } - - return ret; -} - -void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) -{ - struct kbase_device *kbdev = kctx->kbdev; - int js; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - /* Move kctx to the pullable/upullable list as per the new priority */ - if (new_priority != kctx->priority) { - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - if (kctx->slots_pullable & (1 << js)) - list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_pullable[js][new_priority]); - else - list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], - &kbdev->js_data.ctx_list_unpullable[js][new_priority]); - } - - kctx->priority = new_priority; - } -} - -void kbase_js_update_ctx_priority(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; - int prio; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { - /* Determine the new priority for context, as per the priority - * of currently in-use atoms. - */ - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { - if (kctx->atoms_count[prio]) { - new_priority = prio; - break; - } - } - } - - kbase_js_set_ctx_priority(kctx, new_priority); -} - -bool kbasep_js_add_job(struct kbase_context *kctx, - struct kbase_jd_atom *atom) -{ - unsigned long flags; - struct kbasep_js_kctx_info *js_kctx_info; - struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; - - bool enqueue_required = false; - bool timer_sync = false; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(atom != NULL); - lockdep_assert_held(&kctx->jctx.lock); - - kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - - /* - * Begin Runpool transaction - */ - mutex_lock(&js_devdata->runpool_mutex); - - /* Refcount ctx.nr_jobs */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); - ++(js_kctx_info->ctx.nr_jobs); - - /* Lock for state available during IRQ */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (++kctx->atoms_count[atom->sched_priority] == 1) - kbase_js_update_ctx_priority(kctx); - - if (!kbase_js_dep_validate(kctx, atom)) { - /* Dependencies could not be represented */ - --(js_kctx_info->ctx.nr_jobs); - - /* Setting atom status back to queued as it still has unresolved - * dependencies */ - atom->status = KBASE_JD_ATOM_STATE_QUEUED; - - /* Undo the count, as the atom will get added again later but - * leave the context priority adjusted or boosted, in case if - * this was the first higher priority atom received for this - * context. - * This will prevent the scenario of priority inversion, where - * another context having medium priority atoms keeps getting - * scheduled over this context, which is having both lower and - * higher priority atoms, but higher priority atoms are blocked - * due to dependency on lower priority atoms. With priority - * boost the high priority atom will get to run at earliest. - */ - kctx->atoms_count[atom->sched_priority]--; - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&js_devdata->runpool_mutex); - - goto out_unlock; - } - - enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); - - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, - kbasep_js_trace_get_refcnt(kbdev, kctx)); - - /* Context Attribute Refcounting */ - kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); - - if (enqueue_required) { - if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) - timer_sync = kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, atom->slot_nr); - else - timer_sync = kbase_js_ctx_list_add_unpullable_nolock( - kbdev, kctx, atom->slot_nr); - } - /* If this context is active and the atom is the first on its slot, - * kick the job manager to attempt to fast-start the atom */ - if (enqueue_required && kctx == - kbdev->hwaccess.active_kctx[atom->slot_nr]) - kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (timer_sync) - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&js_devdata->runpool_mutex); - /* End runpool transaction */ - - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { - if (kbase_ctx_flag(kctx, KCTX_DYING)) { - /* A job got added while/after kbase_job_zap_context() - * was called on a non-scheduled context. Kill that job - * by killing the context. */ - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, - false); - } else if (js_kctx_info->ctx.nr_jobs == 1) { - /* Handle Refcount going from 0 to 1: schedule the - * context on the Queue */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); - - /* Queue was updated - caller must try to - * schedule the head context */ - WARN_ON(!enqueue_required); - } - } -out_unlock: - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - mutex_unlock(&js_devdata->queue_mutex); - - return enqueue_required; -} - -void kbasep_js_remove_job(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_jd_atom *atom) -{ - struct kbasep_js_kctx_info *js_kctx_info; - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(atom != NULL); - - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, - kbasep_js_trace_get_refcnt(kbdev, kctx)); - - /* De-refcount ctx.nr_jobs */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); - --(js_kctx_info->ctx.nr_jobs); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (--kctx->atoms_count[atom->sched_priority] == 0) - kbase_js_update_ctx_priority(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - unsigned long flags; - struct kbasep_js_atom_retained_state katom_retained_state; - bool attr_state_changed; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(katom != NULL); - - kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); - kbasep_js_remove_job(kbdev, kctx, katom); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* The atom has 'finished' (will not be re-run), so no need to call - * kbasep_js_has_atom_finished(). - * - * This is because it returns false for soft-stopped atoms, but we - * want to override that, because we're cancelling an atom regardless of - * whether it was soft-stopped or not */ - attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, - &katom_retained_state); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return attr_state_changed; -} - -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - unsigned long flags; - bool result; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - return result; -} - -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, - int as_nr) -{ - unsigned long flags; - struct kbase_context *found_kctx = NULL; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - found_kctx = kbdev->as_to_kctx[as_nr]; - - if (found_kctx != NULL) - kbase_ctx_sched_retain_ctx_refcount(found_kctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return found_kctx; -} - -/** - * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after - * releasing a context and/or atom - * @kbdev: The kbase_device to operate on - * @kctx: The kbase_context to operate on - * @katom_retained_state: Retained state from the atom - * @runpool_ctx_attr_change: True if the runpool context attributes have changed - * - * This collates a set of actions that must happen whilst hwaccess_lock is held. - * - * This includes running more jobs when: - * - The previously released kctx caused a ctx attribute change, - * - The released atom caused a ctx attribute change, - * - Slots were previously blocked due to affinity restrictions, - * - Submission during IRQ handling failed. - * - * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were - * changed. The caller should try scheduling all contexts - */ -static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( - struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state, - bool runpool_ctx_attr_change) -{ - struct kbasep_js_device_data *js_devdata; - kbasep_js_release_result result = 0; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(katom_retained_state != NULL); - js_devdata = &kbdev->js_data; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { - /* A change in runpool ctx attributes might mean we can - * run more jobs than before */ - result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; - - KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, - kctx, NULL, 0u, 0); - } - return result; -} - -/** - * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference - * on a ctx and an atom's "retained state", only - * taking the runpool and as transaction mutexes - * @kbdev: The kbase_device to operate on - * @kctx: The kbase_context to operate on - * @katom_retained_state: Retained state from the atom - * - * This also starts more jobs running in the case of an ctx-attribute state change - * - * This does none of the followup actions for scheduling: - * - It does not schedule in a new context - * - It does not requeue or handle dying contexts - * - * For those tasks, just call kbasep_js_runpool_release_ctx() instead - * - * Has following requirements - * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr - * - Context has a non-zero refcount - * - Caller holds js_kctx_info->ctx.jsctx_mutex - * - Caller holds js_devdata->runpool_mutex - * - * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating - * the result of releasing a context that whether the caller should try - * scheduling a new context or should try scheduling all contexts. - */ -static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( - struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state) -{ - unsigned long flags; - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - kbasep_js_release_result release_result = 0u; - bool runpool_ctx_attr_change = false; - int kctx_as_nr; - int new_ref_count; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - js_kctx_info = &kctx->jctx.sched_info; - js_devdata = &kbdev->js_data; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - kctx_as_nr = kctx->as_nr; - KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); - - /* - * Transaction begins on AS and runpool_irq - * - * Assert about out calling contract - */ - mutex_lock(&kbdev->pm.lock); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); - KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); - - /* Update refcount */ - kbase_ctx_sched_release_ctx(kctx); - new_ref_count = atomic_read(&kctx->refcount); - - /* Release the atom if it finished (i.e. wasn't soft-stopped) */ - if (kbasep_js_has_atom_finished(katom_retained_state)) - runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( - kbdev, kctx, katom_retained_state); - - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, - new_ref_count); - - if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && - !kbase_pm_is_suspending(kbdev)) { - /* Context is kept scheduled into an address space even when - * there are no jobs, in this case we have to handle the - * situation where all jobs have been evicted from the GPU and - * submission is disabled. - * - * At this point we re-enable submission to allow further jobs - * to be executed - */ - kbasep_js_set_submit_allowed(js_devdata, kctx); - } - - /* Make a set of checks to see if the context should be scheduled out. - * Note that there'll always be at least 1 reference to the context - * which was previously acquired by kbasep_js_schedule_ctx(). */ - if (new_ref_count == 1 && - (!kbasep_js_is_submit_allowed(js_devdata, kctx) || - kbdev->pm.suspending)) { - int num_slots = kbdev->gpu_props.num_job_slots; - int slot; - - /* Last reference, and we've been told to remove this context - * from the Run Pool */ - dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", - kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, - kbasep_js_is_submit_allowed(js_devdata, kctx)); - -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_mmu_as_released(kctx->as_nr); -#endif - KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); - - kbase_backend_release_ctx_irq(kbdev, kctx); - - for (slot = 0; slot < num_slots; slot++) { - if (kbdev->hwaccess.active_kctx[slot] == kctx) - kbdev->hwaccess.active_kctx[slot] = NULL; - } - - /* Ctx Attribute handling - * - * Releasing atoms attributes must either happen before this, or - * after the KCTX_SHEDULED flag is changed, otherwise we - * double-decount the attributes - */ - runpool_ctx_attr_change |= - kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); - - /* Releasing the context and katom retained state can allow - * more jobs to run */ - release_result |= - kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, - kctx, katom_retained_state, - runpool_ctx_attr_change); - - /* - * Transaction ends on AS and runpool_irq: - * - * By this point, the AS-related data is now clear and ready - * for re-use. - * - * Since releases only occur once for each previous successful - * retain, and no more retains are allowed on this context, no - * other thread will be operating in this - * code whilst we are - */ - - /* Recalculate pullable status for all slots */ - for (slot = 0; slot < num_slots; slot++) { - if (kbase_js_ctx_pullable(kctx, slot, false)) - kbase_js_ctx_list_add_pullable_nolock(kbdev, - kctx, slot); - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - kbase_backend_release_ctx_noirq(kbdev, kctx); - - mutex_unlock(&kbdev->pm.lock); - - /* Note: Don't reuse kctx_as_nr now */ - - /* Synchronize with any timers */ - kbase_backend_ctx_count_changed(kbdev); - - /* update book-keeping info */ - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); - /* Signal any waiter that the context is not scheduled, so is - * safe for termination - once the jsctx_mutex is also dropped, - * and jobs have finished. */ - wake_up(&js_kctx_info->ctx.is_scheduled_wait); - - /* Queue an action to occur after we've dropped the lock */ - release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | - KBASEP_JS_RELEASE_RESULT_SCHED_ALL; - } else { - kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, - katom_retained_state, runpool_ctx_attr_change); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->pm.lock); - } - - return release_result; -} - -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_atom_retained_state katom_retained_state; - - /* Setup a dummy katom_retained_state */ - kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); - - kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - &katom_retained_state); -} - -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, bool has_pm_ref) -{ - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - /* This is called if and only if you've you've detached the context from - * the Runpool Queue, and not added it back to the Runpool - */ - KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - if (kbase_ctx_flag(kctx, KCTX_DYING)) { - /* Dying: don't requeue, but kill all jobs on the context. This - * happens asynchronously */ - dev_dbg(kbdev->dev, - "JS: ** Killing Context %p on RunPool Remove **", kctx); - kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); - } -} - -void kbasep_js_runpool_release_ctx_and_katom_retained_state( - struct kbase_device *kbdev, struct kbase_context *kctx, - struct kbasep_js_atom_retained_state *katom_retained_state) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - kbasep_js_release_result release_result; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - js_kctx_info = &kctx->jctx.sched_info; - js_devdata = &kbdev->js_data; - - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - - release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - katom_retained_state); - - /* Drop the runpool mutex to allow requeing kctx */ - mutex_unlock(&js_devdata->runpool_mutex); - - if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); - - /* Drop the jsctx_mutex to allow scheduling in a new context */ - - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - - if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) - kbase_js_sched_all(kbdev); -} - -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_atom_retained_state katom_retained_state; - - kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); - - kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, - &katom_retained_state); -} - -/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into - * kbase_js_sched_all() */ -static void kbasep_js_runpool_release_ctx_no_schedule( - struct kbase_device *kbdev, struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - kbasep_js_release_result release_result; - struct kbasep_js_atom_retained_state katom_retained_state_struct; - struct kbasep_js_atom_retained_state *katom_retained_state = - &katom_retained_state_struct; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - js_kctx_info = &kctx->jctx.sched_info; - js_devdata = &kbdev->js_data; - kbasep_js_atom_retained_state_init_invalid(katom_retained_state); - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - - release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, - katom_retained_state); - - /* Drop the runpool mutex to allow requeing kctx */ - mutex_unlock(&js_devdata->runpool_mutex); - if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); - - /* Drop the jsctx_mutex to allow scheduling in a new context */ - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - /* NOTE: could return release_result if the caller would like to know - * whether it should schedule a new context, but currently no callers do - */ -} - -void kbase_js_set_timeouts(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - - kbase_backend_timeouts_changed(kbdev); -} - -static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - unsigned long flags; - bool kctx_suspended = false; - int as_nr; - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - /* Pick available address space for this context */ - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - as_nr = kbase_ctx_sched_retain_ctx(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - if (as_nr == KBASEP_AS_NR_INVALID) { - as_nr = kbase_backend_find_and_release_free_address_space( - kbdev, kctx); - if (as_nr != KBASEP_AS_NR_INVALID) { - /* Attempt to retain the context again, this should - * succeed */ - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - as_nr = kbase_ctx_sched_retain_ctx(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - WARN_ON(as_nr == KBASEP_AS_NR_INVALID); - } - } - if (as_nr == KBASEP_AS_NR_INVALID) - return false; /* No address spaces currently available */ - - /* - * Atomic transaction on the Context and Run Pool begins - */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->mmu_hw_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Check to see if context is dying due to kbase_job_zap_context() */ - if (kbase_ctx_flag(kctx, KCTX_DYING)) { - /* Roll back the transaction so far and return */ - kbase_ctx_sched_release_ctx(kctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - return false; - } - - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, - 0u, - kbasep_js_trace_get_refcnt(kbdev, kctx)); - - kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); - - /* Assign context to previously chosen address space */ - if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { - /* Roll back the transaction so far and return */ - kbase_ctx_sched_release_ctx(kctx); - kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - return false; - } - - kbdev->hwaccess.active_kctx[js] = kctx; - -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_mmu_as_in_use(kctx->as_nr); -#endif - KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx); - - /* Cause any future waiter-on-termination to wait until the context is - * descheduled */ - wake_up(&js_kctx_info->ctx.is_scheduled_wait); - - /* Re-check for suspending: a suspend could've occurred, and all the - * contexts could've been removed from the runpool before we took this - * lock. In this case, we don't want to allow this context to run jobs, - * we just want it out immediately. - * - * The DMB required to read the suspend flag was issued recently as part - * of the hwaccess_lock locking. If a suspend occurs *after* that lock - * was taken (i.e. this condition doesn't execute), then the - * kbasep_js_suspend() code will cleanup this context instead (by virtue - * of it being called strictly after the suspend flag is set, and will - * wait for this lock to drop) */ - if (kbase_pm_is_suspending(kbdev)) { - /* Cause it to leave at some later point */ - bool retained; - - retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - KBASE_DEBUG_ASSERT(retained); - - kbasep_js_clear_submit_allowed(js_devdata, kctx); - kctx_suspended = true; - } - - kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); - - /* Transaction complete */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kbdev->mmu_hw_mutex); - - /* Synchronize with any timers */ - kbase_backend_ctx_count_changed(kbdev); - - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Note: after this point, the context could potentially get scheduled - * out immediately */ - - if (kctx_suspended) { - /* Finishing forcing out the context due to a suspend. Use a - * variant of kbasep_js_runpool_release_ctx() that doesn't - * schedule a new context, to prevent a risk of recursion back - * into this function */ - kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); - return false; - } - return true; -} - -static bool kbase_js_use_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_backend_use_ctx_sched(kbdev, kctx, js)) { - /* Context already has ASID - mark as active */ - if (kbdev->hwaccess.active_kctx[js] != kctx) { - kbdev->hwaccess.active_kctx[js] = kctx; - kbase_ctx_flag_clear(kctx, - KCTX_PULLED_SINCE_ACTIVE_JS0 << js); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return true; /* Context already scheduled */ - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - return kbasep_js_schedule_ctx(kbdev, kctx, js); -} - -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_kctx_info *js_kctx_info; - struct kbasep_js_device_data *js_devdata; - bool is_scheduled; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - /* This must never be attempted whilst suspending - i.e. it should only - * happen in response to a syscall from a user-space thread */ - BUG_ON(kbase_pm_is_suspending(kbdev)); - - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - - /* Mark the context as privileged */ - kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); - - is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); - if (!is_scheduled) { - /* Add the context to the pullable list */ - if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) - kbase_js_sync_timers(kbdev); - - /* Fast-starting requires the jsctx_mutex to be dropped, - * because it works on multiple ctxs */ - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - - /* Try to schedule the context in */ - kbase_js_sched_all(kbdev); - - /* Wait for the context to be scheduled in */ - wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - } else { - /* Already scheduled in - We need to retain it to keep the - * corresponding address space */ - kbasep_js_runpool_retain_ctx(kbdev, kctx); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - } -} -KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); - -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kctx != NULL); - js_kctx_info = &kctx->jctx.sched_info; - - /* We don't need to use the address space anymore */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - /* Release the context - it will be scheduled out */ - kbasep_js_runpool_release_ctx(kbdev, kctx); - - kbase_js_sched_all(kbdev); -} -KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); - -void kbasep_js_suspend(struct kbase_device *kbdev) -{ - unsigned long flags; - struct kbasep_js_device_data *js_devdata; - int i; - u16 retained = 0u; - int nr_privileged_ctx = 0; - - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); - js_devdata = &kbdev->js_data; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Prevent all contexts from submitting */ - js_devdata->runpool_irq.submit_allowed = 0; - - /* Retain each of the contexts, so we can cause it to leave even if it - * had no refcount to begin with */ - for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { - struct kbase_context *kctx = kbdev->as_to_kctx[i]; - - retained = retained << 1; - - if (kctx && !(kbdev->as_free & (1u << i))) { - kbase_ctx_sched_retain_ctx_refcount(kctx); - retained |= 1u; - /* We can only cope with up to 1 privileged context - - * the instrumented context. It'll be suspended by - * disabling instrumentation */ - if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { - ++nr_privileged_ctx; - WARN_ON(nr_privileged_ctx != 1); - } - } - } - CSTD_UNUSED(nr_privileged_ctx); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* De-ref the previous retain to ensure each context gets pulled out - * sometime later. */ - for (i = 0; - i < BASE_MAX_NR_AS; - ++i, retained = retained >> 1) { - struct kbase_context *kctx = kbdev->as_to_kctx[i]; - - if (retained & 1u) - kbasep_js_runpool_release_ctx(kbdev, kctx); - } - - /* Caller must wait for all Power Manager active references to be - * dropped */ -} - -void kbasep_js_resume(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata; - int js, prio; - - KBASE_DEBUG_ASSERT(kbdev); - js_devdata = &kbdev->js_data; - KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); - - mutex_lock(&js_devdata->queue_mutex); - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { - struct kbase_context *kctx, *n; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - list_for_each_entry_safe(kctx, n, - &kbdev->js_data.ctx_list_unpullable[js][prio], - jctx.sched_info.ctx.ctx_list_entry[js]) { - struct kbasep_js_kctx_info *js_kctx_info; - bool timer_sync = false; - - /* Drop lock so we can take kctx mutexes */ - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - - js_kctx_info = &kctx->jctx.sched_info; - - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && - kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (timer_sync) - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&js_devdata->runpool_mutex); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - /* Take lock before accessing list again */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - } - } - mutex_unlock(&js_devdata->queue_mutex); - - /* Restart atom processing */ - kbase_js_sched_all(kbdev); - - /* JS Resume complete */ -} - -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - if ((katom->core_req & BASE_JD_REQ_FS) && - (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | - BASE_JD_REQ_T))) - return false; - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) && - (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) && - (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T))) - return false; - - return true; -} - -static int kbase_js_get_slot(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) -{ - if (katom->core_req & BASE_JD_REQ_FS) - return 0; - - if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { - if (katom->device_nr == 1 && - kbdev->gpu_props.num_core_groups == 2) - return 2; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) - return 2; - } - - return 1; -} - -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom) -{ - bool enqueue_required; - - katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - lockdep_assert_held(&kctx->jctx.lock); - - /* If slot will transition from unpullable to pullable then add to - * pullable list */ - if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { - enqueue_required = true; - } else { - enqueue_required = false; - } - if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || - (katom->pre_dep && (katom->pre_dep->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - - list_add_tail(&katom->queue, &queue->x_dep_head); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - enqueue_required = false; - } else { - /* Check if there are lower priority jobs to soft stop */ - kbase_job_slot_ctx_priority_check_locked(kctx, katom); - - /* Add atom to ring buffer. */ - jsctx_tree_add(kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; - } - - return enqueue_required; -} - -/** - * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the - * runnable_tree, ready for execution - * @katom: Atom to submit - * - * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, - * but is still present in the x_dep list. If @katom has a same-slot dependent - * atom then that atom (and any dependents) will also be moved. - */ -static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&katom->kctx->kbdev->hwaccess_lock); - - while (katom) { - WARN_ON(!(katom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - - if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - list_del(&katom->queue); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - jsctx_tree_add(katom->kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; - } else { - break; - } - - katom = katom->post_dep; - } -} - - -/** - * kbase_js_evict_deps - Evict dependencies of a failed atom. - * @kctx: Context pointer - * @katom: Pointer to the atom that has failed. - * @js: The job slot the katom was run on. - * @prio: Priority of the katom. - * - * Remove all post dependencies of an atom from the context ringbuffers. - * - * The original atom's event_code will be propogated to all dependent atoms. - * - * Context: Caller must hold the HW access lock - */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, int prio) -{ - struct kbase_jd_atom *x_dep = katom->x_post_dep; - struct kbase_jd_atom *next_katom = katom->post_dep; - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - if (next_katom) { - KBASE_DEBUG_ASSERT(next_katom->status != - KBASE_JD_ATOM_STATE_HW_COMPLETED); - next_katom->will_fail_event_code = katom->event_code; - - } - - /* Has cross slot depenency. */ - if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { - /* Remove dependency.*/ - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - - /* Fail if it had a data dependency. */ - if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { - x_dep->will_fail_event_code = katom->event_code; - } - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) - kbase_js_move_to_tree(x_dep); - } -} - -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) -{ - struct kbase_jd_atom *katom; - struct kbasep_js_device_data *js_devdata; - struct kbase_device *kbdev; - int pulled; - - KBASE_DEBUG_ASSERT(kctx); - - kbdev = kctx->kbdev; - - js_devdata = &kbdev->js_data; - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) - return NULL; - if (kbase_pm_is_suspending(kbdev)) - return NULL; - - katom = jsctx_rb_peek(kctx, js); - if (!katom) - return NULL; - if (kctx->blocked_js[js][katom->sched_priority]) - return NULL; - if (atomic_read(&katom->blocked)) - return NULL; - - /* Due to ordering restrictions when unpulling atoms on failure, we do - * not allow multiple runs of fail-dep atoms from the same context to be - * present on the same slot */ - if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { - struct kbase_jd_atom *prev_atom = - kbase_backend_inspect_tail(kbdev, js); - - if (prev_atom && prev_atom->kctx != kctx) - return NULL; - } - - if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { - if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) - return NULL; - if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && - kbase_backend_nr_atoms_on_slot(kbdev, js)) - return NULL; - } - - kbase_ctx_flag_set(kctx, KCTX_PULLED); - kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); - - pulled = atomic_inc_return(&kctx->atoms_pulled); - if (pulled == 1 && !kctx->slots_pullable) { - WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); - atomic_inc(&kbdev->js_data.nr_contexts_runnable); - } - atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); - kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; - jsctx_rb_pull(kctx, katom); - - kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - - katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; - - katom->ticks = 0; - - return katom; -} - - -static void js_return_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); - struct kbase_context *kctx = katom->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; - struct kbasep_js_atom_retained_state retained_state; - int js = katom->slot_nr; - int prio = katom->sched_priority; - bool timer_sync = false; - bool context_idle = false; - unsigned long flags; - base_jd_core_req core_req = katom->core_req; - - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom); - - kbase_backend_complete_wq(kbdev, katom); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) - kbase_as_poking_timer_release_atom(kbdev, kctx, katom); - - kbasep_js_atom_retained_state_copy(&retained_state, katom); - - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - - atomic_dec(&kctx->atoms_pulled); - atomic_dec(&kctx->atoms_pulled_slot[js]); - - atomic_dec(&katom->blocked); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--; - - if (!atomic_read(&kctx->atoms_pulled_slot[js]) && - jsctx_rb_none_to_pull(kctx, js)) - timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); - - /* If this slot has been blocked due to soft-stopped atoms, and all - * atoms have now been processed, then unblock the slot */ - if (!kctx->atoms_pulled_slot_pri[js][prio] && - kctx->blocked_js[js][prio]) { - kctx->blocked_js[js][prio] = false; - - /* Only mark the slot as pullable if the context is not idle - - * that case is handled below */ - if (atomic_read(&kctx->atoms_pulled) && - kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - } - - if (!atomic_read(&kctx->atoms_pulled)) { - if (!kctx->slots_pullable) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); - atomic_dec(&kbdev->js_data.nr_contexts_runnable); - timer_sync = true; - } - - if (kctx->as_nr != KBASEP_AS_NR_INVALID && - !kbase_ctx_flag(kctx, KCTX_DYING)) { - int num_slots = kbdev->gpu_props.num_job_slots; - int slot; - - if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) - kbasep_js_set_submit_allowed(js_devdata, kctx); - - for (slot = 0; slot < num_slots; slot++) { - if (kbase_js_ctx_pullable(kctx, slot, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, slot); - } - } - - kbase_jm_idle_ctx(kbdev, kctx); - - context_idle = true; - } - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - if (context_idle) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); - kbase_pm_context_idle(kbdev); - } - - if (timer_sync) - kbase_js_sync_timers(kbdev); - - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - - katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; - kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, - &retained_state); - - kbase_js_sched_all(kbdev); - - kbase_backend_complete_wq_post_sched(kbdev, core_req); -} - -void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - jsctx_rb_unpull(kctx, katom); - - WARN_ON(work_pending(&katom->work)); - - /* Block re-submission until workqueue has run */ - atomic_inc(&katom->blocked); - - kbase_job_check_leave_disjoint(kctx->kbdev, katom); - - INIT_WORK(&katom->work, js_return_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} - -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom) -{ - struct kbasep_js_kctx_info *js_kctx_info; - struct kbasep_js_device_data *js_devdata; - struct kbase_device *kbdev; - unsigned long flags; - bool timer_sync = false; - int atom_slot; - bool context_idle = false; - int prio = katom->sched_priority; - - kbdev = kctx->kbdev; - atom_slot = katom->slot_nr; - - js_kctx_info = &kctx->jctx.sched_info; - js_devdata = &kbdev->js_data; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&js_devdata->runpool_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - context_idle = !atomic_dec_return(&kctx->atoms_pulled); - atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); - kctx->atoms_pulled_slot_pri[atom_slot][prio]--; - - if (!atomic_read(&kctx->atoms_pulled) && - !kctx->slots_pullable) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); - kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); - atomic_dec(&kbdev->js_data.nr_contexts_runnable); - timer_sync = true; - } - - /* If this slot has been blocked due to soft-stopped atoms, and - * all atoms have now been processed, then unblock the slot */ - if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] - && kctx->blocked_js[atom_slot][prio]) { - kctx->blocked_js[atom_slot][prio] = false; - if (kbase_js_ctx_pullable(kctx, atom_slot, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, atom_slot); - } - } - WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); - - if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && - jsctx_rb_none_to_pull(kctx, atom_slot)) { - if (!list_empty( - &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) - timer_sync |= kbase_js_ctx_list_remove_nolock( - kctx->kbdev, kctx, atom_slot); - } - - /* - * If submission is disabled on this context (most likely due to an - * atom failure) and there are now no atoms left in the system then - * re-enable submission so that context can be scheduled again. - */ - if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && - !atomic_read(&kctx->atoms_pulled) && - !kbase_ctx_flag(kctx, KCTX_DYING)) { - int js; - - kbasep_js_set_submit_allowed(js_devdata, kctx); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - } - } else if (katom->x_post_dep && - kbasep_js_is_submit_allowed(js_devdata, kctx)) { - int js; - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, js); - } - } - - /* Mark context as inactive. The pm reference will be dropped later in - * jd_done_worker(). - */ - if (context_idle) - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - if (timer_sync) - kbase_backend_ctx_count_changed(kbdev); - mutex_unlock(&js_devdata->runpool_mutex); - - return context_idle; -} - -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp) -{ - struct kbase_device *kbdev; - struct kbase_context *kctx = katom->kctx; - struct kbase_jd_atom *x_dep = katom->x_post_dep; - - kbdev = kctx->kbdev; - - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - if (katom->will_fail_event_code) - katom->event_code = katom->will_fail_event_code; - - katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; - - if (katom->event_code != BASE_JD_EVENT_DONE) { - kbase_js_evict_deps(kctx, katom, katom->slot_nr, - katom->sched_priority); - } - -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, - katom->slot_nr), NULL, 0); -#endif - - kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); - - /* Unblock cross dependency if present */ - if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && - (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false); - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - kbase_js_move_to_tree(x_dep); - if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false)) - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, - x_dep->slot_nr); - - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) - return x_dep; - } - - return NULL; -} - -void kbase_js_sched(struct kbase_device *kbdev, int js_mask) -{ - struct kbasep_js_device_data *js_devdata; - struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; - bool timer_sync = false; - bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; - int js; - - js_devdata = &kbdev->js_data; - - down(&js_devdata->schedule_sem); - mutex_lock(&js_devdata->queue_mutex); - - for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - last_active[js] = kbdev->hwaccess.active_kctx[js]; - ctx_waiting[js] = false; - } - - while (js_mask) { - js = ffs(js_mask) - 1; - - while (1) { - struct kbase_context *kctx; - unsigned long flags; - bool context_idle = false; - - kctx = kbase_js_ctx_list_pop_head(kbdev, js); - - if (!kctx) { - js_mask &= ~(1 << js); - break; /* No contexts on pullable list */ - } - - if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { - context_idle = true; - - if (kbase_pm_context_active_handle_suspend( - kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { - /* Suspend pending - return context to - * queue and stop scheduling */ - mutex_lock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - if (kbase_js_ctx_list_add_pullable_head( - kctx->kbdev, kctx, js)) - kbase_js_sync_timers(kbdev); - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - up(&js_devdata->schedule_sem); - return; - } - kbase_ctx_flag_set(kctx, KCTX_ACTIVE); - } - - if (!kbase_js_use_ctx(kbdev, kctx, js)) { - mutex_lock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - /* Context can not be used at this time */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbase_js_ctx_pullable(kctx, js, false) - || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) - timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( - kctx->kbdev, kctx, js); - else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, - flags); - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - if (context_idle) { - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); - kbase_pm_context_idle(kbdev); - } - - /* No more jobs can be submitted on this slot */ - js_mask &= ~(1 << js); - break; - } - mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbase_ctx_flag_clear(kctx, KCTX_PULLED); - - if (!kbase_jm_kick(kbdev, 1 << js)) - /* No more jobs can be submitted on this slot */ - js_mask &= ~(1 << js); - - if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { - bool pullable = kbase_js_ctx_pullable(kctx, js, - true); - - /* Failed to pull jobs - push to head of list. - * Unless this context is already 'active', in - * which case it's effectively already scheduled - * so push it to the back of the list. */ - if (pullable && kctx == last_active[js] && - kbase_ctx_flag(kctx, - (KCTX_PULLED_SINCE_ACTIVE_JS0 << - js))) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kctx->kbdev, - kctx, js); - else if (pullable) - timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( - kctx->kbdev, - kctx, js); - else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, - kctx, js); - - /* If this context is not the active context, - * but the active context is pullable on this - * slot, then we need to remove the active - * marker to prevent it from submitting atoms in - * the IRQ handler, which would prevent this - * context from making progress. */ - if (last_active[js] && kctx != last_active[js] - && kbase_js_ctx_pullable( - last_active[js], js, true)) - ctx_waiting[js] = true; - - if (context_idle) { - kbase_jm_idle_ctx(kbdev, kctx); - spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); - WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); - kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); - kbase_pm_context_idle(kbdev); - } else { - spin_unlock_irqrestore( - &kbdev->hwaccess_lock, - flags); - } - mutex_unlock( - &kctx->jctx.sched_info.ctx.jsctx_mutex); - - js_mask &= ~(1 << js); - break; /* Could not run atoms on this slot */ - } - - /* Push to back of list */ - if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( - kctx->kbdev, kctx, js); - else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( - kctx->kbdev, kctx, js); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); - } - } - - if (timer_sync) - kbase_js_sync_timers(kbdev); - - for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { - if (kbdev->hwaccess.active_kctx[js] == last_active[js] && - ctx_waiting[js]) - kbdev->hwaccess.active_kctx[js] = NULL; - } - - mutex_unlock(&js_devdata->queue_mutex); - up(&js_devdata->schedule_sem); -} - -void kbase_js_zap_context(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; - int js; - - /* - * Critical assumption: No more submission is possible outside of the - * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) - * whilst the struct kbase_context is terminating. - */ - - /* First, atomically do the following: - * - mark the context as dying - * - try to evict it from the queue */ - mutex_lock(&kctx->jctx.lock); - mutex_lock(&js_devdata->queue_mutex); - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbase_ctx_flag_set(kctx, KCTX_DYING); - - dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); - - /* - * At this point we know: - * - If eviction succeeded, it was in the queue, but now no - * longer is - * - We must cancel the jobs here. No Power Manager active reference to - * release. - * - This happens asynchronously - kbase_jd_zap_context() will wait for - * those jobs to be killed. - * - If eviction failed, then it wasn't in the queue. It is one - * of the following: - * - a. it didn't have any jobs, and so is not in the Queue or - * the Run Pool (not scheduled) - * - Hence, no more work required to cancel jobs. No Power Manager - * active reference to release. - * - b. it was in the middle of a scheduling transaction (and thus must - * have at least 1 job). This can happen from a syscall or a - * kernel thread. We still hold the jsctx_mutex, and so the thread - * must be waiting inside kbasep_js_try_schedule_head_ctx(), - * before checking whether the runpool is full. That thread will - * continue after we drop the mutex, and will notice the context - * is dying. It will rollback the transaction, killing all jobs at - * the same time. kbase_jd_zap_context() will wait for those jobs - * to be killed. - * - Hence, no more work required to cancel jobs, or to release the - * Power Manager active reference. - * - c. it is scheduled, and may or may not be running jobs - * - We must cause it to leave the runpool by stopping it from - * submitting any more jobs. When it finally does leave, - * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs - * (because it is dying), release the Power Manager active reference, - * and will not requeue the context in the queue. - * kbase_jd_zap_context() will wait for those jobs to be killed. - * - Hence, work required just to make it leave the runpool. Cancelling - * jobs and releasing the Power manager active reference will be - * handled when it leaves the runpool. - */ - if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - if (!list_empty( - &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) - list_del_init( - &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* The following events require us to kill off remaining jobs - * and update PM book-keeping: - * - we evicted it correctly (it must have jobs to be in the - * Queue) - * - * These events need no action, but take this path anyway: - * - Case a: it didn't have any jobs, and was never in the Queue - * - Case b: scheduling transaction will be partially rolled- - * back (this already cancels the jobs) - */ - - KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); - - /* Only cancel jobs when we evicted from the - * queue. No Power Manager active reference was held. - * - * Having is_dying set ensures that this kills, and - * doesn't requeue */ - kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); - - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&kctx->jctx.lock); - } else { - unsigned long flags; - bool was_retained; - - /* Case c: didn't evict, but it is scheduled - it's in the Run - * Pool */ - KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, - kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); - - /* Disable the ctx from submitting any more jobs */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - kbasep_js_clear_submit_allowed(js_devdata, kctx); - - /* Retain and (later) release the context whilst it is is now - * disallowed from submitting jobs - ensures that someone - * somewhere will be removing the context later on */ - was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); - - /* Since it's scheduled and we have the jsctx_mutex, it must be - * retained successfully */ - KBASE_DEBUG_ASSERT(was_retained); - - dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); - - /* Cancel any remaining running jobs for this kctx - if any. - * Submit is disallowed which takes effect immediately, so no - * more new jobs will appear after we do this. */ - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - kbase_job_slot_hardstop(kctx, js, NULL); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&kctx->jctx.lock); - - dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", - kctx); - - kbasep_js_runpool_release_ctx(kbdev, kctx); - } - - KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); - - /* After this, you must wait on both the - * kbase_jd_context::zero_jobs_wait and the - * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs - * to be destroyed, and the context to be de-scheduled (if it was on the - * runpool). - * - * kbase_jd_zap_context() will do this. */ -} - -static inline int trace_get_refcnt(struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - return atomic_read(&kctx->refcount); -} - -/** - * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context - * @kctx: Pointer to context. - * @callback: Pointer to function to call for each job. - * - * Call a function on all jobs belonging to a non-queued, non-running - * context, and detach the jobs from the context as it goes. - * - * Due to the locks that might be held at the time of the call, the callback - * may need to defer work on a workqueue to complete its actions (e.g. when - * cancelling jobs) - * - * Atoms will be removed from the queue, so this must only be called when - * cancelling jobs (which occurs as part of context destruction). - * - * The locking conditions on the caller are as follows: - * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. - */ -static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb callback) -{ - struct kbase_device *kbdev; - unsigned long flags; - u32 js; - - kbdev = kctx->kbdev; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, - 0u, trace_get_refcnt(kbdev, kctx)); - - /* Invoke callback on jobs on each slot in turn */ - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_queue_foreach(kctx, js, callback); - - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.h deleted file mode 100755 index 355da27edc1b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js.h +++ /dev/null @@ -1,912 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_js.h - * Job Scheduler APIs. - */ - -#ifndef _KBASE_JS_H_ -#define _KBASE_JS_H_ - -#include "mali_kbase_js_defs.h" -#include "mali_kbase_context.h" -#include "mali_kbase_defs.h" -#include "mali_kbase_debug.h" - -#include "mali_kbase_js_ctx_attr.h" - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js Job Scheduler Internal APIs - * @{ - * - * These APIs are Internal to KBase. - */ - -/** - * @brief Initialize the Job Scheduler - * - * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero - * initialized before passing to the kbasep_js_devdata_init() function. This is - * to give efficient error path code. - */ -int kbasep_js_devdata_init(struct kbase_device * const kbdev); - -/** - * @brief Halt the Job Scheduler. - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - * - */ -void kbasep_js_devdata_halt(struct kbase_device *kbdev); - -/** - * @brief Terminate the Job Scheduler - * - * It is safe to call this on \a kbdev even if it the kbasep_js_device_data - * sub-structure was never initialized/failed initialization, to give efficient - * error-path code. - * - * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must - * be zero initialized before passing to the kbasep_js_devdata_init() - * function. This is to give efficient error path code. - * - * It is a Programming Error to call this whilst there are still kbase_context - * structures registered with this scheduler. - */ -void kbasep_js_devdata_term(struct kbase_device *kbdev); - -/** - * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. - * - * This effectively registers a struct kbase_context with a Job Scheduler. - * - * It does not register any jobs owned by the struct kbase_context with the scheduler. - * Those must be separately registered by kbasep_js_add_job(). - * - * The struct kbase_context must be zero intitialized before passing to the - * kbase_js_init() function. This is to give efficient error path code. - */ -int kbasep_js_kctx_init(struct kbase_context * const kctx); - -/** - * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler - * - * This effectively de-registers a struct kbase_context from its Job Scheduler - * - * It is safe to call this on a struct kbase_context that has never had or failed - * initialization of its jctx.sched_info member, to give efficient error-path - * code. - * - * For this to work, the struct kbase_context must be zero intitialized before passing - * to the kbase_js_init() function. - * - * It is a Programming Error to call this whilst there are still jobs - * registered with this context. - */ -void kbasep_js_kctx_term(struct kbase_context *kctx); - -/** - * @brief Add a job chain to the Job Scheduler, and take necessary actions to - * schedule the context/run the job. - * - * This atomically does the following: - * - Update the numbers of jobs information - * - Add the job to the run pool if necessary (part of init_job) - * - * Once this is done, then an appropriate action is taken: - * - If the ctx is scheduled, it attempts to start the next job (which might be - * this added job) - * - Otherwise, and if this is the first job on the context, it enqueues it on - * the Policy Queue - * - * The Policy's Queue can be updated by this in the following ways: - * - In the above case that this is the first job on the context - * - If the context is high priority and the context is not scheduled, then it - * could cause the Policy to schedule out a low-priority context, allowing - * this context to be scheduled in. - * - * If the context is already scheduled on the RunPool, then adding a job to it - * is guarenteed not to update the Policy Queue. And so, the caller is - * guarenteed to not need to try scheduling a context from the Run Pool - it - * can safely assert that the result is false. - * - * It is a programming error to have more than U32_MAX jobs in flight at a time. - * - * The following locking conditions are made on the caller: - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold hwaccess_lock (as this will be obtained internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - * @return true indicates that the Policy Queue was updated, and so the - * caller will need to try scheduling a context onto the Run Pool. - * @return false indicates that no updates were made to the Policy Queue, - * so no further action is required from the caller. This is \b always returned - * when the context is currently scheduled. - */ -bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. - * - * Completely removing a job requires several calls: - * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of - * the atom - * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler - * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the - * remaining state held as part of the job having been run. - * - * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. - * - * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. - * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool - * - * Do not use this for removing jobs being killed by kbase_jd_cancel() - use - * kbasep_js_remove_cancelled_job() instead. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - */ -void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); - -/** - * @brief Completely remove a job chain from the Job Scheduler, in the case - * where the job chain was cancelled. - * - * This is a variant of kbasep_js_remove_job() that takes care of removing all - * of the retained state too. This is generally useful for cancelled atoms, - * which need not be handled in an optimal way. - * - * It is a programming error to call this when: - * - \a atom is not a job belonging to kctx. - * - \a atom has already been removed from the Job Scheduler. - * - \a atom is still in the runpool: - * - it is not being killed with kbasep_jd_cancel() - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold the hwaccess_lock, (as this will be obtained - * internally) - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be - * obtained internally) - * - * @return true indicates that ctx attributes have changed and the caller - * should call kbase_js_sched_all() to try to run more jobs - * @return false otherwise - */ -bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, - struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. - * - * @note This function can safely be called from IRQ context. - * - * The following locking conditions are made on the caller: - * - it must \em not hold mmu_hw_mutex and hwaccess_lock, because they will be - * used internally. - * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). - */ -bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Refcount a context as being busy, preventing it from being scheduled - * out. - * - * @note This function can safely be called from IRQ context. - * - * The following locks must be held by the caller: - * - mmu_hw_mutex, hwaccess_lock - * - * @return value != false if the retain succeeded, and the context will not be scheduled out. - * @return false if the retain failed (because the context is being/has been scheduled out). - */ -bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Lookup a context in the Run Pool based upon its current address space - * and ensure that is stays scheduled in. - * - * The context is refcounted as being busy to prevent it from scheduling - * out. It must be released with kbasep_js_runpool_release_ctx() when it is no - * longer required to stay scheduled in. - * - * @note This function can safely be called from IRQ context. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * If the hwaccess_lock is already held, then the caller should use - * kbasep_js_runpool_lookup_ctx_nolock() instead. - * - * @return a valid struct kbase_context on success, which has been refcounted as being busy. - * @return NULL on failure, indicating that no context was found in \a as_nr - */ -struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); - -/** - * @brief Handling the requeuing/killing of a context that was evicted from the - * policy queue or runpool. - * - * This should be used whenever handing off a context that has been evicted - * from the policy queue or the runpool: - * - If the context is not dying and has jobs, it gets re-added to the policy - * queue - * - Otherwise, it is not added - * - * In addition, if the context is dying the jobs are killed asynchronously. - * - * In all cases, the Power Manager active reference is released - * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a - * has_pm_ref must be set to false whenever the context was not previously in - * the runpool and does not hold a Power Manager active refcount. Note that - * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an - * active refcount even though they weren't in the runpool. - * - * The following locking conditions are made on the caller: - * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - */ -void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); - -/** - * @brief Release a refcount of a context being busy, allowing it to be - * scheduled out. - * - * When the refcount reaches zero and the context \em might be scheduled out - * (depending on whether the Scheudling Policy has deemed it so, or if it has run - * out of jobs). - * - * If the context does get scheduled out, then The following actions will be - * taken as part of deschduling a context: - * - For the context being descheduled: - * - If the context is in the processing of dying (all the jobs are being - * removed from it), then descheduling also kills off any jobs remaining in the - * context. - * - If the context is not dying, and any jobs remain after descheduling the - * context then it is re-enqueued to the Policy's Queue. - * - Otherwise, the context is still known to the scheduler, but remains absent - * from the Policy Queue until a job is next added to it. - * - In all descheduling cases, the Power Manager active reference (obtained - * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). - * - * Whilst the context is being descheduled, this also handles actions that - * cause more atoms to be run: - * - Attempt submitting atoms when the Context Attributes on the Runpool have - * changed. This is because the context being scheduled out could mean that - * there are more opportunities to run atoms. - * - Attempt submitting to a slot that was previously blocked due to affinity - * restrictions. This is usually only necessary when releasing a context - * happens as part of completing a previous job, but is harmless nonetheless. - * - Attempt scheduling in a new context (if one is available), and if necessary, - * running a job from that new context. - * - * Unlike retaining a context in the runpool, this function \b cannot be called - * from IRQ context. - * - * It is a programming error to call this on a \a kctx that is not currently - * scheduled, or that already has a zero refcount. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be - * obtained internally) - * - */ -void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional - * actions from completing an atom. - * - * This is usually called as part of completing an atom and releasing the - * refcount on the context held by the atom. - * - * Therefore, the extra actions carried out are part of handling actions queued - * on a completed atom, namely: - * - Releasing the atom's context attributes - * - Retrying the submission on a particular slot, because we couldn't submit - * on that slot from an IRQ handler. - * - * The locking conditions of this function are the same as those for - * kbasep_js_runpool_release_ctx() - */ -void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); - -/** - * @brief Variant of kbase_js_runpool_release_ctx() that assumes that - * kbasep_js_device_data::runpool_mutex and - * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not - * attempt to schedule new contexts. - */ -void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx); - -/** - * @brief Schedule in a privileged context - * - * This schedules a context in regardless of the context priority. - * If the runpool is full, a context will be forced out of the runpool and the function will wait - * for the new context to be scheduled in. - * The context will be kept scheduled in (and the corresponding address space reserved) until - * kbasep_js_release_privileged_ctx is called). - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will - * be used internally. - * - */ -void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Release a privileged context, allowing it to be scheduled out. - * - * See kbasep_js_runpool_release_ctx for potential side effects. - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. - * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be - * obtained internally) - * - it must \em not hold the kbase_device::mmu_hw_mutex (as this will be - * obtained internally) - * - */ -void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * @brief Try to submit the next job on each slot - * - * The following locks may be used: - * - kbasep_js_device_data::runpool_mutex - * - hwaccess_lock - */ -void kbase_js_try_run_jobs(struct kbase_device *kbdev); - -/** - * @brief Suspend the job scheduler during a Power Management Suspend event. - * - * Causes all contexts to be removed from the runpool, and prevents any - * contexts from (re)entering the runpool. - * - * This does not handle suspending the one privileged context: the caller must - * instead do this by by suspending the GPU HW Counter Instrumentation. - * - * This will eventually cause all Power Management active references held by - * contexts on the runpool to be released, without running any more atoms. - * - * The caller must then wait for all Power Mangement active refcount to become - * zero before completing the suspend. - * - * The emptying mechanism may take some time to complete, since it can wait for - * jobs to complete naturally instead of forcing them to end quickly. However, - * this is bounded by the Job Scheduler's Job Timeouts. Hence, this - * function is guaranteed to complete in a finite time. - */ -void kbasep_js_suspend(struct kbase_device *kbdev); - -/** - * @brief Resume the Job Scheduler after a Power Management Resume event. - * - * This restores the actions from kbasep_js_suspend(): - * - Schedules contexts back into the runpool - * - Resumes running atoms on the GPU - */ -void kbasep_js_resume(struct kbase_device *kbdev); - -/** - * @brief Submit an atom to the job scheduler. - * - * The atom is enqueued on the context's ringbuffer. The caller must have - * ensured that all dependencies can be represented in the ringbuffer. - * - * Caller must hold jctx->lock - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to submit - * - * @return Whether the context requires to be enqueued. */ -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); -/** - * @brief Pull an atom from a context in the job scheduler for execution. - * - * The atom will not be removed from the ringbuffer at this stage. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context to pull from - * @param[in] js Job slot to pull from - * @return Pointer to an atom, or NULL if there are no atoms for this - * slot that can be currently run. - */ -struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); - -/** - * @brief Return an atom to the job scheduler ringbuffer. - * - * An atom is 'unpulled' if execution is stopped but intended to be returned to - * later. The most common reason for this is that the atom has been - * soft-stopped. - * - * Note that if multiple atoms are to be 'unpulled', they must be returned in - * the reverse order to which they were originally pulled. It is a programming - * error to return atoms in any other order. - * - * The HW access lock must be held when calling this function. - * - * @param[in] kctx Context pointer - * @param[in] atom Pointer to the atom to unpull - */ -void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom from jd_done_worker(), removing it from the job - * scheduler ringbuffer. - * - * If the atom failed then all dependee atoms marked for failure propagation - * will also fail. - * - * @param[in] kctx Context pointer - * @param[in] katom Pointer to the atom to complete - * @return true if the context is now idle (no jobs pulled) - * false otherwise - */ -bool kbase_js_complete_atom_wq(struct kbase_context *kctx, - struct kbase_jd_atom *katom); - -/** - * @brief Complete an atom. - * - * Most of the work required to complete an atom will be performed by - * jd_done_worker(). - * - * The HW access lock must be held when calling this function. - * - * @param[in] katom Pointer to the atom to complete - * @param[in] end_timestamp The time that the atom completed (may be NULL) - * - * Return: Atom that has now been unblocked and can now be run, or NULL if none - */ -struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, - ktime_t *end_timestamp); - -/** - * @brief Submit atoms from all available contexts. - * - * This will attempt to submit as many jobs as possible to the provided job - * slots. It will exit when either all job slots are full, or all contexts have - * been used. - * - * @param[in] kbdev Device pointer - * @param[in] js_mask Mask of job slots to submit to - */ -void kbase_js_sched(struct kbase_device *kbdev, int js_mask); - -/** - * kbase_jd_zap_context - Attempt to deschedule a context that is being - * destroyed - * @kctx: Context pointer - * - * This will attempt to remove a context from any internal job scheduler queues - * and perform any other actions to ensure a context will not be submitted - * from. - * - * If the context is currently scheduled, then the caller must wait for all - * pending jobs to complete before taking any further action. - */ -void kbase_js_zap_context(struct kbase_context *kctx); - -/** - * @brief Validate an atom - * - * This will determine whether the atom can be scheduled onto the GPU. Atoms - * with invalid combinations of core requirements will be rejected. - * - * @param[in] kbdev Device pointer - * @param[in] katom Atom to validate - * @return true if atom is valid - * false otherwise - */ -bool kbase_js_is_atom_valid(struct kbase_device *kbdev, - struct kbase_jd_atom *katom); - -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -void kbase_js_set_timeouts(struct kbase_device *kbdev); - -/** - * kbase_js_set_ctx_priority - set the context priority - * @kctx: Context pointer - * @new_priority: New priority value for the Context - * - * The context priority is set to a new value and it is moved to the - * pullable/unpullable list as per the new priority. - */ -void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); - - -/** - * kbase_js_update_ctx_priority - update the context priority - * @kctx: Context pointer - * - * The context priority gets updated as per the priority of atoms currently in - * use for that context, but only if system priority mode for context scheduling - * is being used. - */ -void kbase_js_update_ctx_priority(struct kbase_context *kctx); - -/* - * Helpers follow - */ - -/** - * @brief Check that a context is allowed to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * As with any bool, never test the return value with true. - * - * The caller must hold hwaccess_lock. - */ -static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 test_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - test_bit = (u16) (1u << kctx->as_nr); - - return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); -} - -/** - * @brief Allow a context to submit jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 set_bit; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - set_bit = (u16) (1u << kctx->as_nr); - - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed |= set_bit; -} - -/** - * @brief Prevent a context from submitting more jobs on this policy - * - * The purpose of this abstraction is to hide the underlying data size, and wrap up - * the long repeated line of code. - * - * The caller must hold hwaccess_lock. - */ -static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) -{ - u16 clear_bit; - u16 clear_mask; - - /* Ensure context really is scheduled in */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - clear_bit = (u16) (1u << kctx->as_nr); - clear_mask = ~clear_bit; - - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", - kctx, kctx->as_nr); - - js_devdata->runpool_irq.submit_allowed &= clear_mask; -} - -/** - * Create an initial 'invalid' atom retained state, that requires no - * atom-related work to be done on releasing with - * kbasep_js_runpool_release_ctx_and_katom_retained_state() - */ -static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) -{ - retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; - retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; -} - -/** - * Copy atom state that can be made available after jd_done_nolock() is called - * on that atom. - */ -static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) -{ - retained_state->event_code = katom->event_code; - retained_state->core_req = katom->core_req; - retained_state->sched_priority = katom->sched_priority; - retained_state->device_nr = katom->device_nr; -} - -/** - * @brief Determine whether an atom has finished (given its retained state), - * and so should be given back to userspace/removed from the system. - * - * Reasons for an atom not finishing include: - * - Being soft-stopped (and so, the atom should be resubmitted sometime later) - * - * @param[in] katom_retained_state the retained state of the atom to check - * @return false if the atom has not finished - * @return !=false if the atom has finished - */ -static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); -} - -/** - * @brief Determine whether a struct kbasep_js_atom_retained_state is valid - * - * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the - * code should just ignore it. - * - * @param[in] katom_retained_state the atom's retained state to check - * @return false if the retained state is invalid, and can be ignored - * @return !=false if the retained state is valid - */ -static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) -{ - return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); -} - -/** - * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the - * context is guaranteed to be already previously retained. - * - * It is a programming error to supply the \a as_nr of a context that has not - * been previously retained/has a busy refcount of zero. The only exception is - * when there is no ctx in \a as_nr (NULL returned). - * - * The following locking conditions are made on the caller: - * - it must \em not hold the hwaccess_lock, because it will be used internally. - * - * @return a valid struct kbase_context on success, with a refcount that is guaranteed - * to be non-zero and unmodified by this function. - * @return NULL on failure, indicating that no context was found in \a as_nr - */ -static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_context *found_kctx; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); - - found_kctx = kbdev->as_to_kctx[as_nr]; - KBASE_DEBUG_ASSERT(found_kctx == NULL || - atomic_read(&found_kctx->refcount) > 0); - - return found_kctx; -} - -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_inc_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); - ++(js_devdata->nr_all_contexts_running); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < - S8_MAX); - ++(js_devdata->nr_user_contexts_running); - } -} - -/* - * The following locking conditions are made on the caller: - * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. - * - The caller must hold the kbasep_js_device_data::runpool_mutex - */ -static inline void kbase_js_runpool_dec_context_count( - struct kbase_device *kbdev, - struct kbase_context *kctx) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&js_devdata->runpool_mutex); - - /* Track total contexts */ - --(js_devdata->nr_all_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); - - if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { - /* Track contexts that can submit jobs */ - --(js_devdata->nr_user_contexts_running); - KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); - } -} - - -/** - * @brief Submit atoms from all available contexts to all job slots. - * - * This will attempt to submit as many jobs as possible. It will exit when - * either all job slots are full, or all contexts have been used. - * - * @param[in] kbdev Device pointer - */ -static inline void kbase_js_sched_all(struct kbase_device *kbdev) -{ - kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); -} - -extern const int -kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; - -extern const base_jd_prio -kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - -/** - * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) - * to relative ordering - * @atom_prio: Priority ID to translate. - * - * Atom priority values for @ref base_jd_prio cannot be compared directly to - * find out which are higher or lower. - * - * This function will convert base_jd_prio values for successively lower - * priorities into a monotonically increasing sequence. That is, the lower the - * base_jd_prio priority, the higher the value produced by this function. This - * is in accordance with how the rest of the kernel treates priority. - * - * The mapping is 1:1 and the size of the valid input range is the same as the - * size of the valid output range, i.e. - * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS - * - * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions - * - * Return: On success: a value in the inclusive range - * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: - * KBASE_JS_ATOM_SCHED_PRIO_INVALID - */ -static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) -{ - if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) - return KBASE_JS_ATOM_SCHED_PRIO_INVALID; - - return kbasep_js_atom_priority_to_relative[atom_prio]; -} - -static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) -{ - unsigned int prio_idx; - - KBASE_DEBUG_ASSERT(0 <= sched_prio - && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); - - prio_idx = (unsigned int)sched_prio; - - return kbasep_js_relative_priority_to_atom[prio_idx]; -} - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.c deleted file mode 100755 index 1ff230cc222d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#include -#include - -/* - * Private functions follow - */ - -/** - * @brief Check whether a ctx has a certain attribute, and if so, retain that - * attribute on the runpool. - * - * Requires: - * - jsctx mutex - * - runpool_irq spinlock - * - ctx is scheduled on the runpool - * - * @return true indicates a change in ctx attributes state of the runpool. - * In this state, the scheduler might be able to submit more jobs than - * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() - * or similar is called sometime later. - * @return false indicates no change in ctx attributes state of the runpool. - */ -static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { - KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); - ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); - - if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { - /* First refcount indicates a state change */ - runpool_state_changed = true; - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); - } - } - - return runpool_state_changed; -} - -/** - * @brief Check whether a ctx has a certain attribute, and if so, release that - * attribute on the runpool. - * - * Requires: - * - jsctx mutex - * - runpool_irq spinlock - * - ctx is scheduled on the runpool - * - * @return true indicates a change in ctx attributes state of the runpool. - * In this state, the scheduler might be able to submit more jobs than - * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() - * or similar is called sometime later. - * @return false indicates no change in ctx attributes state of the runpool. - */ -static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_device_data *js_devdata; - struct kbasep_js_kctx_info *js_kctx_info; - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_devdata = &kbdev->js_data; - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->hwaccess_lock); - KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { - KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); - --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); - - if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { - /* Last de-refcount indicates a state change */ - runpool_state_changed = true; - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); - } - } - - return runpool_state_changed; -} - -/** - * @brief Retain a certain attribute on a ctx, also retaining it on the runpool - * if the context is scheduled. - * - * Requires: - * - jsctx mutex - * - If the context is scheduled, then runpool_irq spinlock must also be held - * - * @return true indicates a change in ctx attributes state of the runpool. - * This may allow the scheduler to submit more jobs than previously. - * @return false indicates no change in ctx attributes state of the runpool. - */ -static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_kctx_info *js_kctx_info; - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&kbdev->hwaccess_lock); - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); - - ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); - - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { - /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); - runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); - } - - return runpool_state_changed; -} - -/* - * @brief Release a certain attribute on a ctx, also releasing it from the runpool - * if the context is scheduled. - * - * Requires: - * - jsctx mutex - * - If the context is scheduled, then runpool_irq spinlock must also be held - * - * @return true indicates a change in ctx attributes state of the runpool. - * This may allow the scheduler to submit more jobs than previously. - * @return false indicates no change in ctx attributes state of the runpool. - */ -static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_kctx_info *js_kctx_info; - bool runpool_state_changed = false; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_kctx_info = &kctx->jctx.sched_info; - - lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); - - if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { - lockdep_assert_held(&kbdev->hwaccess_lock); - /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ - runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); - KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); - } - - /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ - --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); - - return runpool_state_changed; -} - -/* - * More commonly used public functions - */ - -void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -{ - bool runpool_state_changed; - int i; - - /* Retain any existing attributes */ - for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { - /* The context is being scheduled in, so update the runpool with the new attributes */ - runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); - - /* We don't need to know about state changed, because retaining a - * context occurs on scheduling it, and that itself will also try - * to run new atoms */ - CSTD_UNUSED(runpool_state_changed); - } - } -} - -bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) -{ - bool runpool_state_changed = false; - int i; - - /* Release any existing attributes */ - for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { - if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { - /* The context is being scheduled out, so update the runpool on the removed attributes */ - runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); - } - } - - return runpool_state_changed; -} - -void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - bool runpool_state_changed = false; - base_jd_core_req core_req; - - KBASE_DEBUG_ASSERT(katom); - core_req = katom->core_req; - - if (core_req & BASE_JD_REQ_ONLY_COMPUTE) - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); - else - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); - - if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { - /* Atom that can run on slot1 or slot2, and can use all cores */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); - } - - /* We don't need to know about state changed, because retaining an - * atom occurs on adding it, and that itself will also try to run - * new atoms */ - CSTD_UNUSED(runpool_state_changed); -} - -bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) -{ - bool runpool_state_changed = false; - base_jd_core_req core_req; - - KBASE_DEBUG_ASSERT(katom_retained_state); - core_req = katom_retained_state->core_req; - - /* No-op for invalid atoms */ - if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) - return false; - - if (core_req & BASE_JD_REQ_ONLY_COMPUTE) - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); - else - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); - - if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { - /* Atom that can run on slot1 or slot2, and can use all cores */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); - } - - return runpool_state_changed; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.h deleted file mode 100755 index 25fd39787c71..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_ctx_attr.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_js_ctx_attr.h - * Job Scheduler Context Attribute APIs - */ - -#ifndef _KBASE_JS_CTX_ATTR_H_ -#define _KBASE_JS_CTX_ATTR_H_ - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js - * @{ - */ - -/** - * Retain all attributes of a context - * - * This occurs on scheduling in the context on the runpool (but after - * is_scheduled is set) - * - * Requires: - * - jsctx mutex - * - runpool_irq spinlock - * - ctx->is_scheduled is true - */ -void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * Release all attributes of a context - * - * This occurs on scheduling out the context from the runpool (but before - * is_scheduled is cleared) - * - * Requires: - * - jsctx mutex - * - runpool_irq spinlock - * - ctx->is_scheduled is true - * - * @return true indicates a change in ctx attributes state of the runpool. - * In this state, the scheduler might be able to submit more jobs than - * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() - * or similar is called sometime later. - * @return false indicates no change in ctx attributes state of the runpool. - */ -bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); - -/** - * Retain all attributes of an atom - * - * This occurs on adding an atom to a context - * - * Requires: - * - jsctx mutex - * - If the context is scheduled, then runpool_irq spinlock must also be held - */ -void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); - -/** - * Release all attributes of an atom, given its retained state. - * - * This occurs after (permanently) removing an atom from a context - * - * Requires: - * - jsctx mutex - * - If the context is scheduled, then runpool_irq spinlock must also be held - * - * This is a no-op when \a katom_retained_state is invalid. - * - * @return true indicates a change in ctx attributes state of the runpool. - * In this state, the scheduler might be able to submit more jobs than - * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() - * or similar is called sometime later. - * @return false indicates no change in ctx attributes state of the runpool. - */ -bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); - -/** - * Requires: - * - runpool_irq spinlock - */ -static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_device_data *js_devdata; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_devdata = &kbdev->js_data; - - return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; -} - -/** - * Requires: - * - runpool_irq spinlock - */ -static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) -{ - /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ - return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); -} - -/** - * Requires: - * - jsctx mutex - */ -static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) -{ - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); - js_kctx_info = &kctx->jctx.sched_info; - - /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ - return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); -} - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_defs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_defs.h deleted file mode 100755 index 052a0b368315..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_js_defs.h +++ /dev/null @@ -1,416 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_js.h - * Job Scheduler Type Definitions - */ - -#ifndef _KBASE_JS_DEFS_H_ -#define _KBASE_JS_DEFS_H_ - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js - * @{ - */ -/* Forward decls */ -struct kbase_device; -struct kbase_jd_atom; - - -typedef u32 kbase_context_flags; - -struct kbasep_atom_req { - base_jd_core_req core_req; - kbase_context_flags ctx_req; - u32 device_nr; -}; - -/** Callback function run on all of a context's jobs registered with the Job - * Scheduler */ -typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); - -/** - * @brief Maximum number of jobs that can be submitted to a job slot whilst - * inside the IRQ handler. - * - * This is important because GPU NULL jobs can complete whilst the IRQ handler - * is running. Otherwise, it potentially allows an unlimited number of GPU NULL - * jobs to be submitted inside the IRQ handler, which increases IRQ latency. - */ -#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 - -/** - * @brief Context attributes - * - * Each context attribute can be thought of as a boolean value that caches some - * state information about either the runpool, or the context: - * - In the case of the runpool, it is a cache of "Do any contexts owned by - * the runpool have attribute X?" - * - In the case of a context, it is a cache of "Do any atoms owned by the - * context have attribute X?" - * - * The boolean value of the context attributes often affect scheduling - * decisions, such as affinities to use and job slots to use. - * - * To accomodate changes of state in the context, each attribute is refcounted - * in the context, and in the runpool for all running contexts. Specifically: - * - The runpool holds a refcount of how many contexts in the runpool have this - * attribute. - * - The context holds a refcount of how many atoms have this attribute. - */ -enum kbasep_js_ctx_attr { - /** Attribute indicating a context that contains Compute jobs. That is, - * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE - * - * @note A context can be both 'Compute' and 'Non Compute' if it contains - * both types of jobs. - */ - KBASEP_JS_CTX_ATTR_COMPUTE, - - /** Attribute indicating a context that contains Non-Compute jobs. That is, - * the context has some jobs that are \b not of type @ref - * BASE_JD_REQ_ONLY_COMPUTE. - * - * @note A context can be both 'Compute' and 'Non Compute' if it contains - * both types of jobs. - */ - KBASEP_JS_CTX_ATTR_NON_COMPUTE, - - /** Attribute indicating that a context contains compute-job atoms that - * aren't restricted to a coherent group, and can run on all cores. - * - * Specifically, this is when the atom's \a core_req satisfy: - * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 - * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups - * - * Such atoms could be blocked from running if one of the coherent groups - * is being used by another job slot, so tracking this context attribute - * allows us to prevent such situations. - * - * @note This doesn't take into account the 1-coregroup case, where all - * compute atoms would effectively be able to run on 'all cores', but - * contexts will still not always get marked with this attribute. Instead, - * it is the caller's responsibility to take into account the number of - * coregroups when interpreting this attribute. - * - * @note Whilst Tiler atoms are normally combined with - * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without - * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy - * enough to handle anyway. - */ - KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, - - /** Must be the last in the enum */ - KBASEP_JS_CTX_ATTR_COUNT -}; - -enum { - /** Bit indicating that new atom should be started because this atom completed */ - KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), - /** Bit indicating that the atom was evicted from the JS_NEXT registers */ - KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) -}; - -/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ -typedef u32 kbasep_js_atom_done_code; - -/* - * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode - */ -enum { - /* - * In this mode, higher priority atoms will be scheduled first, - * regardless of the context they belong to. Newly-runnable higher - * priority atoms can preempt lower priority atoms currently running on - * the GPU, even if they belong to a different context. - */ - KBASE_JS_SYSTEM_PRIORITY_MODE = 0, - - /* - * In this mode, the highest-priority atom will be chosen from each - * context in turn using a round-robin algorithm, so priority only has - * an effect within the context an atom belongs to. Newly-runnable - * higher priority atoms can preempt the lower priority atoms currently - * running on the GPU, but only if they belong to the same context. - */ - KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, - - /* Must be the last in the enum */ - KBASE_JS_PRIORITY_MODE_COUNT, -}; - -/* - * Internal atom priority defines for kbase_jd_atom::sched_prio - */ -enum { - KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, - KBASE_JS_ATOM_SCHED_PRIO_MED, - KBASE_JS_ATOM_SCHED_PRIO_LOW, - KBASE_JS_ATOM_SCHED_PRIO_COUNT, -}; - -/* Invalid priority for kbase_jd_atom::sched_prio */ -#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 - -/* Default priority in the case of contexts with no atoms, or being lenient - * about invalid priorities from userspace. - */ -#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED - -/** - * @brief KBase Device Data Job Scheduler sub-structure - * - * This encapsulates the current context of the Job Scheduler on a particular - * device. This context is global to the device, and is not tied to any - * particular struct kbase_context running on the device. - * - * nr_contexts_running and as_free are optimized for packing together (by making - * them smaller types than u32). The operations on them should rarely involve - * masking. The use of signed types for arithmetic indicates to the compiler that - * the value will not rollover (which would be undefined behavior), and so under - * the Total License model, it is free to make optimizations based on that (i.e. - * to remove masking). - */ -struct kbasep_js_device_data { - /* Sub-structure to collect together Job Scheduling data used in IRQ - * context. The hwaccess_lock must be held when accessing. */ - struct runpool_irq { - /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. - * When bit 'N' is set in this, it indicates whether the context bound to address space - * 'N' is allowed to submit jobs. - */ - u16 submit_allowed; - - /** Context Attributes: - * Each is large enough to hold a refcount of the number of contexts - * that can fit into the runpool. This is currently BASE_MAX_NR_AS - * - * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store - * the refcount. Hence, it's not worthwhile reducing this to - * bit-manipulation on u32s to save space (where in contrast, 4 bit - * sub-fields would be easy to do and would save space). - * - * Whilst this must not become negative, the sign bit is used for: - * - error detection in debug builds - * - Optimization: it is undefined for a signed int to overflow, and so - * the compiler can optimize for that never happening (thus, no masking - * is required on updating the variable) */ - s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; - - /* - * Affinity management and tracking - */ - /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates - * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ - u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; - /** Refcount for each core owned by each slot. Used to generate the - * slot_affinities array of bitvectors - * - * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, - * because it is refcounted only when a job is definitely about to be - * submitted to a slot, and is de-refcounted immediately after a job - * finishes */ - s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; - } runpool_irq; - - /** - * Run Pool mutex, for managing contexts within the runpool. - * Unless otherwise specified, you must hold this lock whilst accessing any - * members that follow - * - * In addition, this is used to access: - * - the kbasep_js_kctx_info::runpool substructure - */ - struct mutex runpool_mutex; - - /** - * Queue Lock, used to access the Policy's queue of contexts independently - * of the Run Pool. - * - * Of course, you don't need the Run Pool lock to access this. - */ - struct mutex queue_mutex; - - /** - * Scheduling semaphore. This must be held when calling - * kbase_jm_kick() - */ - struct semaphore schedule_sem; - - /** - * List of contexts that can currently be pulled from - */ - struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - /** - * List of contexts that can not currently be pulled from, but have - * jobs currently running. - */ - struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ - s8 nr_user_contexts_running; - /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ - s8 nr_all_contexts_running; - - /** Core Requirements to match up with base_js_atom's core_req memeber - * @note This is a write-once member, and so no locking is required to read */ - base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; - - u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ - u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ - u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ - u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ - u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ - u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ - u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ - u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ - u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ - u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ - - /**< Value for JS_SOFT_JOB_TIMEOUT */ - atomic_t soft_job_timeout_ms; - - /** List of suspended soft jobs */ - struct list_head suspended_soft_jobs_list; - -#ifdef CONFIG_MALI_DEBUG - /* Support soft-stop on a single context */ - bool softstop_always; -#endif /* CONFIG_MALI_DEBUG */ - - /** The initalized-flag is placed at the end, to avoid cache-pollution (we should - * only be using this during init/term paths). - * @note This is a write-once member, and so no locking is required to read */ - int init_status; - - /* Number of contexts that can currently be pulled from */ - u32 nr_contexts_pullable; - - /* Number of contexts that can either be pulled from or are currently - * running */ - atomic_t nr_contexts_runnable; -}; - -/** - * @brief KBase Context Job Scheduling information structure - * - * This is a substructure in the struct kbase_context that encapsulates all the - * scheduling information. - */ -struct kbasep_js_kctx_info { - - /** - * Job Scheduler Context information sub-structure. These members are - * accessed regardless of whether the context is: - * - In the Policy's Run Pool - * - In the Policy's Queue - * - Not queued nor in the Run Pool. - * - * You must obtain the jsctx_mutex before accessing any other members of - * this substructure. - * - * You may not access any of these members from IRQ context. - */ - struct kbase_jsctx { - struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ - - /** Number of jobs ready to run - does \em not include the jobs waiting in - * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr - * for such jobs*/ - u32 nr_jobs; - - /** Context Attributes: - * Each is large enough to hold a refcount of the number of atoms on - * the context. **/ - u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; - - /** - * Wait queue to wait for KCTX_SHEDULED flag state changes. - * */ - wait_queue_head_t is_scheduled_wait; - - /** Link implementing JS queues. Context can be present on one - * list per job slot - */ - struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; - } ctx; - - /* The initalized-flag is placed at the end, to avoid cache-pollution (we should - * only be using this during init/term paths) */ - int init_status; -}; - -/** Subset of atom state that can be available after jd_done_nolock() is called - * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), - * because the original atom could disappear. */ -struct kbasep_js_atom_retained_state { - /** Event code - to determine whether the atom has finished */ - enum base_jd_event_code event_code; - /** core requirements */ - base_jd_core_req core_req; - /* priority */ - int sched_priority; - /* Core group atom was executed on */ - u32 device_nr; - -}; - -/** - * Value signifying 'no retry on a slot required' for: - * - kbase_js_atom_retained_state::retry_submit_on_slot - * - kbase_jd_atom::retry_submit_on_slot - */ -#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) - -/** - * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. - * - * @see kbase_atom_retained_state_is_valid() - */ -#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP - -/** - * @brief The JS timer resolution, in microseconds - * - * Any non-zero difference in time will be at least this size. - */ -#define KBASEP_JS_TICK_RESOLUTION_US 1 - - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_linux.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_linux.h deleted file mode 100755 index 003ac9e68a76..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_linux.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_linux.h - * Base kernel APIs, Linux implementation. - */ - -#ifndef _KBASE_LINUX_H_ -#define _KBASE_LINUX_H_ - -/* All things that are needed for the Linux port. */ -#include -#include -#include -#include -#include - -#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) - #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) -#else - #define KBASE_EXPORT_TEST_API(func) -#endif - -#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) - -#endif /* _KBASE_LINUX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.c deleted file mode 100755 index 3d0de90346de..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.c +++ /dev/null @@ -1,3819 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mem.c - * Base kernel memory APIs - */ -#ifdef CONFIG_DMA_SHARED_BUFFER -#include -#endif /* CONFIG_DMA_SHARED_BUFFER */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* Forward declarations */ -static void free_partial_locked(struct kbase_context *kctx, - struct kbase_mem_pool *pool, struct tagged_addr tp); - -static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) -{ -#if defined(CONFIG_ARM64) - /* VA_BITS can be as high as 48 bits, but all bits are available for - * both user and kernel. - */ - size_t cpu_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - /* x86_64 can access 48 bits of VA, but the 48th is used to denote - * kernel (1) vs userspace (0), so the max here is 47. - */ - size_t cpu_va_bits = 47; -#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) - size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; -#else -#error "Unknown CPU VA width for this architecture" -#endif - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - cpu_va_bits = 32; -#endif - - return cpu_va_bits; -} - -/* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to */ -static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, - u64 gpu_pfn) -{ - struct rb_root *rbtree = NULL; - - /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA - * zone if this has been initialized. - */ - if (gpu_pfn >= kctx->exec_va_start) - rbtree = &kctx->reg_rbtree_exec; - else { - u64 same_va_end; - -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) -#endif /* CONFIG_64BIT */ - same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; -#ifdef CONFIG_64BIT - else - same_va_end = kctx->same_va_end; -#endif /* CONFIG_64BIT */ - - if (gpu_pfn >= same_va_end) - rbtree = &kctx->reg_rbtree_custom; - else - rbtree = &kctx->reg_rbtree_same; - } - - return rbtree; -} - -/* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) -{ - u64 start_pfn = new_reg->start_pfn; - struct rb_node **link = NULL; - struct rb_node *parent = NULL; - struct rb_root *rbtree = NULL; - - rbtree = new_reg->rbtree; - - link = &(rbtree->rb_node); - /* Find the right place in the tree using tree search */ - while (*link) { - struct kbase_va_region *old_reg; - - parent = *link; - old_reg = rb_entry(parent, struct kbase_va_region, rblink); - - /* RBTree requires no duplicate entries. */ - KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); - - if (old_reg->start_pfn > start_pfn) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - /* Put the new node there, and rebalance tree */ - rb_link_node(&(new_reg->rblink), parent, link); - - rb_insert_color(&(new_reg->rblink), rbtree); -} - -static struct kbase_va_region *find_region_enclosing_range_rbtree( - struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) -{ - struct rb_node *rbnode; - struct kbase_va_region *reg; - u64 end_pfn = start_pfn + nr_pages; - - rbnode = rbtree->rb_node; - - while (rbnode) { - u64 tmp_start_pfn, tmp_end_pfn; - - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - tmp_start_pfn = reg->start_pfn; - tmp_end_pfn = reg->start_pfn + reg->nr_pages; - - /* If start is lower than this, go left. */ - if (start_pfn < tmp_start_pfn) - rbnode = rbnode->rb_left; - /* If end is higher than this, then go right. */ - else if (end_pfn > tmp_end_pfn) - rbnode = rbnode->rb_right; - else /* Enclosing */ - return reg; - } - - return NULL; -} - -struct kbase_va_region *kbase_find_region_enclosing_address( - struct rb_root *rbtree, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode; - struct kbase_va_region *reg; - - rbnode = rbtree->rb_node; - - while (rbnode) { - u64 tmp_start_pfn, tmp_end_pfn; - - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - tmp_start_pfn = reg->start_pfn; - tmp_end_pfn = reg->start_pfn + reg->nr_pages; - - /* If start is lower than this, go left. */ - if (gpu_pfn < tmp_start_pfn) - rbnode = rbnode->rb_left; - /* If end is higher than this, then go right. */ - else if (gpu_pfn >= tmp_end_pfn) - rbnode = rbnode->rb_right; - else /* Enclosing */ - return reg; - } - - return NULL; -} - -/* Find region enclosing given address. */ -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( - struct kbase_context *kctx, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - KBASE_DEBUG_ASSERT(NULL != kctx); - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); - - return kbase_find_region_enclosing_address(rbtree, gpu_addr); -} - -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); - -struct kbase_va_region *kbase_find_region_base_address( - struct rb_root *rbtree, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - - rbnode = rbtree->rb_node; - - while (rbnode) { - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if (reg->start_pfn > gpu_pfn) - rbnode = rbnode->rb_left; - else if (reg->start_pfn < gpu_pfn) - rbnode = rbnode->rb_right; - else - return reg; - } - - return NULL; -} - -/* Find region with given base address */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address( - struct kbase_context *kctx, u64 gpu_addr) -{ - u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; - struct rb_root *rbtree = NULL; - - lockdep_assert_held(&kctx->reg_lock); - - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); - - return kbase_find_region_base_address(rbtree, gpu_addr); -} - -KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); - -/* Find region meeting given requirements */ -static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( - struct kbase_va_region *reg_reqs, - size_t nr_pages, size_t align_offset, size_t align_mask, - u64 *out_start_pfn) -{ - struct rb_node *rbnode = NULL; - struct kbase_va_region *reg = NULL; - struct rb_root *rbtree = NULL; - - /* Note that this search is a linear search, as we do not have a target - address in mind, so does not benefit from the rbtree search */ - rbtree = reg_reqs->rbtree; - - for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - if ((reg->nr_pages >= nr_pages) && - (reg->flags & KBASE_REG_FREE)) { - /* Check alignment */ - u64 start_pfn = reg->start_pfn; - - /* When align_offset == align, this sequence is - * equivalent to: - * (start_pfn + align_mask) & ~(align_mask) - * - * Otherwise, it aligns to n*align + offset, for the - * lowest value n that makes this still >start_pfn */ - start_pfn += align_mask; - start_pfn -= (start_pfn - align_offset) & (align_mask); - - if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { - /* Can't end at 4GB boundary */ - if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) - start_pfn += align_offset; - - /* Can't start at 4GB boundary */ - if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) - start_pfn += align_offset; - - if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || - !(start_pfn & BASE_MEM_PFN_MASK_4GB)) - continue; - } else if (reg_reqs->flags & - KBASE_REG_GPU_VA_SAME_4GB_PAGE) { - u64 end_pfn = start_pfn + nr_pages - 1; - - if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != - (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) - start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; - } - - if ((start_pfn >= reg->start_pfn) && - (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && - ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { - *out_start_pfn = start_pfn; - return reg; - } - } - } - - return NULL; -} - -/** - * @brief Remove a region object from the global list. - * - * The region reg is removed, possibly by merging with other free and - * compatible adjacent regions. It must be called with the context - * region lock held. The associated memory is not released (see - * kbase_free_alloced_region). Internal use only. - */ -int kbase_remove_va_region(struct kbase_va_region *reg) -{ - struct rb_node *rbprev; - struct kbase_va_region *prev = NULL; - struct rb_node *rbnext; - struct kbase_va_region *next = NULL; - struct rb_root *reg_rbtree = NULL; - - int merged_front = 0; - int merged_back = 0; - int err = 0; - - reg_rbtree = reg->rbtree; - - /* Try to merge with the previous block first */ - rbprev = rb_prev(&(reg->rblink)); - if (rbprev) { - prev = rb_entry(rbprev, struct kbase_va_region, rblink); - if (prev->flags & KBASE_REG_FREE) { - /* We're compatible with the previous VMA, - * merge with it */ - WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); - prev->nr_pages += reg->nr_pages; - rb_erase(&(reg->rblink), reg_rbtree); - reg = prev; - merged_front = 1; - } - } - - /* Try to merge with the next block second */ - /* Note we do the lookup here as the tree may have been rebalanced. */ - rbnext = rb_next(&(reg->rblink)); - if (rbnext) { - /* We're compatible with the next VMA, merge with it */ - next = rb_entry(rbnext, struct kbase_va_region, rblink); - if (next->flags & KBASE_REG_FREE) { - WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); - next->start_pfn = reg->start_pfn; - next->nr_pages += reg->nr_pages; - rb_erase(&(reg->rblink), reg_rbtree); - merged_back = 1; - if (merged_front) { - /* We already merged with prev, free it */ - kbase_free_alloced_region(reg); - } - } - } - - /* If we failed to merge then we need to add a new block */ - if (!(merged_front || merged_back)) { - /* - * We didn't merge anything. Add a new free - * placeholder and remove the original one. - */ - struct kbase_va_region *free_reg; - - free_reg = kbase_alloc_free_region(reg_rbtree, - reg->start_pfn, reg->nr_pages, - reg->flags & KBASE_REG_ZONE_MASK); - if (!free_reg) { - err = -ENOMEM; - goto out; - } - rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); - } - - out: - return err; -} - -KBASE_EXPORT_TEST_API(kbase_remove_va_region); - -/** - * kbase_insert_va_region_nolock - Insert a VA region to the list, - * replacing the existing one. - * - * @new_reg: The new region to insert - * @at_reg: The region to replace - * @start_pfn: The Page Frame Number to insert at - * @nr_pages: The number of pages of the region - */ -static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, - struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) -{ - struct rb_root *reg_rbtree = NULL; - int err = 0; - - reg_rbtree = at_reg->rbtree; - - /* Must be a free region */ - KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); - /* start_pfn should be contained within at_reg */ - KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); - /* at least nr_pages from start_pfn should be contained within at_reg */ - KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); - - new_reg->start_pfn = start_pfn; - new_reg->nr_pages = nr_pages; - - /* Regions are a whole use, so swap and delete old one. */ - if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { - rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), - reg_rbtree); - kbase_free_alloced_region(at_reg); - } - /* New region replaces the start of the old one, so insert before. */ - else if (at_reg->start_pfn == start_pfn) { - at_reg->start_pfn += nr_pages; - KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); - at_reg->nr_pages -= nr_pages; - - kbase_region_tracker_insert(new_reg); - } - /* New region replaces the end of the old one, so insert after. */ - else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { - at_reg->nr_pages -= nr_pages; - - kbase_region_tracker_insert(new_reg); - } - /* New region splits the old one, so insert and create new */ - else { - struct kbase_va_region *new_front_reg; - - new_front_reg = kbase_alloc_free_region(reg_rbtree, - at_reg->start_pfn, - start_pfn - at_reg->start_pfn, - at_reg->flags & KBASE_REG_ZONE_MASK); - - if (new_front_reg) { - at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; - at_reg->start_pfn = start_pfn + nr_pages; - - kbase_region_tracker_insert(new_front_reg); - kbase_region_tracker_insert(new_reg); - } else { - err = -ENOMEM; - } - } - - return err; -} - -/** - * kbase_add_va_region - Add a VA region to the region list for a context. - * - * @kctx: kbase context containing the region - * @reg: the region to add - * @addr: the address to insert the region at - * @nr_pages: the number of pages in the region - * @align: the minimum alignment in pages - */ -int kbase_add_va_region(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 addr, - size_t nr_pages, size_t align) -{ - int err = 0; - struct kbase_device *kbdev = kctx->kbdev; - int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); - int gpu_pc_bits = - kbdev->gpu_props.props.core_props.log2_program_counter_size; - - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); - - lockdep_assert_held(&kctx->reg_lock); - - /* The executable allocation from the SAME_VA zone would already have an - * appropriately aligned GPU VA chosen for it. - */ - if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) { - if (cpu_va_bits > gpu_pc_bits) { - align = max(align, (size_t)((1ULL << gpu_pc_bits) - >> PAGE_SHIFT)); - } - } - - do { - err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, - align); - if (err != -ENOMEM) - break; - - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_add_va_region); - -/** - * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree - * - * Insert a region into the rbtree that was specified when the region was - * created. If addr is 0 a free area in the rbtree is used, otherwise the - * specified address is used. - * - * @kbdev: The kbase device - * @reg: The region to add - * @addr: The address to add the region at, or 0 to map at any available address - * @nr_pages: The size of the region in pages - * @align: The minimum alignment in pages - */ -int kbase_add_va_region_rbtree(struct kbase_device *kbdev, - struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align) -{ - struct rb_root *rbtree = NULL; - struct kbase_va_region *tmp; - u64 gpu_pfn = addr >> PAGE_SHIFT; - int err = 0; - - rbtree = reg->rbtree; - - if (!align) - align = 1; - - /* must be a power of 2 */ - KBASE_DEBUG_ASSERT(is_power_of_2(align)); - KBASE_DEBUG_ASSERT(nr_pages > 0); - - /* Path 1: Map a specific address. Find the enclosing region, - * which *must* be free. - */ - if (gpu_pfn) { - struct device *dev = kbdev->dev; - - KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); - - tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, - nr_pages); - if (!tmp) { - dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); - err = -ENOMEM; - goto exit; - } - if (!(tmp->flags & KBASE_REG_FREE)) { - dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", - tmp->start_pfn, tmp->flags, - tmp->nr_pages, gpu_pfn, nr_pages); - err = -ENOMEM; - goto exit; - } - - err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, - nr_pages); - if (err) { - dev_warn(dev, "Failed to insert va region"); - err = -ENOMEM; - } - } else { - /* Path 2: Map any free address which meets the requirements. */ - u64 start_pfn; - size_t align_offset = align; - size_t align_mask = align - 1; - - if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { - WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", - __func__, - (unsigned long)align); - align_mask = reg->extent - 1; - align_offset = reg->extent - reg->initial_commit; - } - - tmp = kbase_region_tracker_find_region_meeting_reqs(reg, - nr_pages, align_offset, align_mask, - &start_pfn); - if (tmp) { - err = kbase_insert_va_region_nolock(reg, tmp, - start_pfn, nr_pages); - } else { - err = -ENOMEM; - } - } - -exit: - return err; -} - -/** - * @brief Initialize the internal region tracker data structure. - */ -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg) -{ - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - /* Although custom_va_reg and exec_va_reg don't always exist, - * initialize unconditionally because of the mem_view debugfs - * implementation which relies on them being empty. - * - * The difference between the two is that the EXEC_VA region - * is never initialized at this stage. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) - kbase_region_tracker_insert(custom_va_reg); -} - -static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) -{ - struct rb_node *rbnode; - struct kbase_va_region *reg; - - do { - rbnode = rb_first(rbtree); - if (rbnode) { - rb_erase(rbnode, rbtree); - reg = rb_entry(rbnode, struct kbase_va_region, rblink); - kbase_free_alloced_region(reg); - } - } while (rbnode); -} - -void kbase_region_tracker_term(struct kbase_context *kctx) -{ - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -} - -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) -{ - kbase_region_tracker_erase_rbtree(rbtree); -} - -static size_t kbase_get_same_va_bits(struct kbase_context *kctx) -{ - return min(kbase_get_num_cpu_va_bits(kctx), - (size_t) kctx->kbdev->gpu_props.mmu.va_bits); -} - -int kbase_region_tracker_init(struct kbase_context *kctx) -{ - struct kbase_va_region *same_va_reg; - struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = kbase_get_same_va_bits(kctx); - u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; - u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; - u64 same_va_pages; - int err; - - /* Take the lock as kbase_free_alloced_region requires it */ - kbase_gpu_vm_lock(kctx); - - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; - /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, - same_va_pages, - KBASE_REG_ZONE_SAME_VA); - - if (!same_va_reg) { - err = -ENOMEM; - goto fail_unlock; - } - -#ifdef CONFIG_64BIT - /* 32-bit clients have custom VA zones */ - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { -#endif - if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - err = -EINVAL; - goto fail_free_same_va; - } - /* If the current size of TMEM is out of range of the - * virtual address space addressable by the MMU then - * we should shrink it to fit - */ - if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) - custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - - custom_va_reg = kbase_alloc_free_region( - &kctx->reg_rbtree_custom, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); - - if (!custom_va_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } -#ifdef CONFIG_64BIT - } else { - custom_va_size = 0; - } -#endif - - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - - kctx->same_va_end = same_va_pages + 1; - kctx->gpu_va_end = kctx->same_va_end + custom_va_size; - kctx->exec_va_start = U64_MAX; - kctx->jit_va = false; - - - kbase_gpu_vm_unlock(kctx); - return 0; - -fail_free_same_va: - kbase_free_alloced_region(same_va_reg); -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -} - -#ifdef CONFIG_64BIT -static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, - u64 jit_va_pages) -{ - struct kbase_va_region *same_va; - struct kbase_va_region *custom_va_reg; - u64 same_va_bits = kbase_get_same_va_bits(kctx); - u64 total_va_size; - - total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; - - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) - return -EINVAL; - - /* - * Modify the same VA free region after creation. Be careful to ensure - * that allocations haven't been made as they could cause an overlap - * to happen with existing same VA allocations and the custom VA zone. - */ - same_va = kbase_region_tracker_find_region_base_address(kctx, - PAGE_SIZE); - if (!same_va) - return -ENOMEM; - - if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) - return -ENOMEM; - - /* It's safe to adjust the same VA zone now */ - same_va->nr_pages -= jit_va_pages; - kctx->same_va_end -= jit_va_pages; - - /* - * Create a custom VA zone at the end of the VA for allocations which - * JIT can use so it doesn't have to allocate VA from the kernel. - */ - custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - kctx->same_va_end, - jit_va_pages, - KBASE_REG_ZONE_CUSTOM_VA); - - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - if (!custom_va_reg) - return -ENOMEM; - - kbase_region_tracker_insert(custom_va_reg); - return 0; -} -#endif - -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level) -{ - int err = 0; - - if (trim_level > 100) - return -EINVAL; - - kbase_gpu_vm_lock(kctx); - -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); -#endif - /* - * Nothing to do for 32-bit clients, JIT uses the existing - * custom VA zone. - */ - - if (!err) { - kctx->jit_max_allocations = max_allocations; - kctx->trim_level = trim_level; - kctx->jit_va = true; - } - - kbase_gpu_vm_unlock(kctx); - - return err; -} - -int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) -{ - struct kbase_va_region *shrinking_va_reg; - struct kbase_va_region *exec_va_reg; - u64 exec_va_start, exec_va_base_addr; - int err; - - /* The EXEC_VA zone shall be created by making space at the end of the - * address space. Firstly, verify that the number of EXEC_VA pages - * requested by the client is reasonable and then make sure that it is - * not greater than the address space itself before calculating the base - * address of the new zone. - */ - if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) - return -EINVAL; - - kbase_gpu_vm_lock(kctx); - - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) { - err = -EPERM; - goto exit_unlock; - } - - if (exec_va_pages > kctx->gpu_va_end) { - err = -ENOMEM; - goto exit_unlock; - } - - exec_va_start = kctx->gpu_va_end - exec_va_pages; - exec_va_base_addr = exec_va_start << PAGE_SHIFT; - - shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, - exec_va_base_addr); - if (!shrinking_va_reg) { - err = -ENOMEM; - goto exit_unlock; - } - - /* Make sure that the EXEC_VA region is still uninitialized */ - if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_EXEC_VA) { - err = -EPERM; - goto exit_unlock; - } - - if (shrinking_va_reg->nr_pages <= exec_va_pages) { - err = -ENOMEM; - goto exit_unlock; - } - - exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, - exec_va_start, - exec_va_pages, - KBASE_REG_ZONE_EXEC_VA); - if (!exec_va_reg) { - err = -ENOMEM; - goto exit_unlock; - } - - shrinking_va_reg->nr_pages -= exec_va_pages; -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - kctx->same_va_end -= exec_va_pages; -#endif - kctx->exec_va_start = exec_va_start; - - kbase_region_tracker_insert(exec_va_reg); - err = 0; - -exit_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -} - - -int kbase_mem_init(struct kbase_device *kbdev) -{ - struct kbasep_mem_device *memdev; - int ret; - - KBASE_DEBUG_ASSERT(kbdev); - - memdev = &kbdev->memdev; - kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX; - - /* Initialize memory usage */ - atomic_set(&memdev->used_pages, 0); - - ret = kbase_mem_pool_init(&kbdev->mem_pool, - KBASE_MEM_POOL_MAX_SIZE_KBDEV, - KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, - kbdev, - NULL); - if (ret) - return ret; - - ret = kbase_mem_pool_init(&kbdev->lp_mem_pool, - (KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9), - KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, - kbdev, - NULL); - if (ret) - kbase_mem_pool_term(&kbdev->mem_pool); - - return ret; -} - -void kbase_mem_halt(struct kbase_device *kbdev) -{ - CSTD_UNUSED(kbdev); -} - -void kbase_mem_term(struct kbase_device *kbdev) -{ - struct kbasep_mem_device *memdev; - int pages; - - KBASE_DEBUG_ASSERT(kbdev); - - memdev = &kbdev->memdev; - - pages = atomic_read(&memdev->used_pages); - if (pages != 0) - dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - - kbase_mem_pool_term(&kbdev->mem_pool); - kbase_mem_pool_term(&kbdev->lp_mem_pool); -} - -KBASE_EXPORT_TEST_API(kbase_mem_term); - - - - -/** - * @brief Allocate a free region object. - * - * The allocated object is not part of any list yet, and is flagged as - * KBASE_REG_FREE. No mapping is allocated yet. - * - * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. - * - */ -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone) -{ - struct kbase_va_region *new_reg; - - KBASE_DEBUG_ASSERT(rbtree != NULL); - - /* zone argument should only contain zone related region flags */ - KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); - KBASE_DEBUG_ASSERT(nr_pages > 0); - /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); - - new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); - - if (!new_reg) - return NULL; - - new_reg->cpu_alloc = NULL; /* no alloc bound yet */ - new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->rbtree = rbtree; - new_reg->flags = zone | KBASE_REG_FREE; - - new_reg->flags |= KBASE_REG_GROWABLE; - - new_reg->start_pfn = start_pfn; - new_reg->nr_pages = nr_pages; - - INIT_LIST_HEAD(&new_reg->jit_node); - - return new_reg; -} - -KBASE_EXPORT_TEST_API(kbase_alloc_free_region); - -static struct kbase_context *kbase_reg_flags_to_kctx( - struct kbase_va_region *reg) -{ - struct kbase_context *kctx = NULL; - struct rb_root *rbtree = reg->rbtree; - - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, - reg_rbtree_exec); - break; - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } - - return kctx; -} - -/** - * @brief Free a region object. - * - * The described region must be freed of any mapping. - * - * If the region is not flagged as KBASE_REG_FREE, the region's - * alloc object will be released. - * It is a bug if no alloc object exists for non-free regions. - * - */ -void kbase_free_alloced_region(struct kbase_va_region *reg) -{ - if (!(reg->flags & KBASE_REG_FREE)) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); - - if (WARN_ON(!kctx)) - return; - - - mutex_lock(&kctx->jit_evict_lock); - - /* - * The physical allocation should have been removed from the - * eviction list before this function is called. However, in the - * case of abnormal process termination or the app leaking the - * memory kbase_mem_free_region is not called so it can still be - * on the list at termination time of the region tracker. - */ - if (!list_empty(®->gpu_alloc->evict_node)) { - mutex_unlock(&kctx->jit_evict_lock); - - /* - * Unlink the physical allocation before unmaking it - * evictable so that the allocation isn't grown back to - * its last backed size as we're going to unmap it - * anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - /* - * If a region has been made evictable then we must - * unmake it before trying to free it. - * If the memory hasn't been reclaimed it will be - * unmapped and freed below, if it has been reclaimed - * then the operations below are no-ops. - */ - if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } else { - mutex_unlock(&kctx->jit_evict_lock); - } - - /* - * Remove the region from the sticky resource metadata - * list should it be there. - */ - kbase_sticky_resource_release(kctx, NULL, - reg->start_pfn << PAGE_SHIFT); - - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - /* To detect use-after-free in debug builds */ - KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); - } - kfree(reg); -} - -KBASE_EXPORT_TEST_API(kbase_free_alloced_region); - -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) -{ - int err; - size_t i = 0; - unsigned long attr; - unsigned long mask = ~KBASE_REG_MEMATTR_MASK; - unsigned long gwt_mask = ~0; - -#ifdef CONFIG_MALI_CINSTR_GWT - if (kctx->gwt_enabled) - gwt_mask = ~KBASE_REG_GPU_WR; -#endif - - if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && - (reg->flags & KBASE_REG_SHARE_BOTH)) - attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); - else - attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); - - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); - - err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); - if (err) - return err; - - if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { - u64 stride; - struct kbase_mem_phy_alloc *alloc; - - alloc = reg->gpu_alloc; - stride = alloc->imported.alias.stride; - KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); - for (i = 0; i < alloc->imported.alias.nents; i++) { - if (alloc->imported.alias.aliased[i].alloc) { - err = kbase_mmu_insert_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn + (i * stride), - alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, - alloc->imported.alias.aliased[i].length, - reg->flags & gwt_mask, - kctx->as_nr); - if (err) - goto bad_insert; - - kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); - } else { - err = kbase_mmu_insert_single_page(kctx, - reg->start_pfn + i * stride, - kctx->aliasing_sink_page, - alloc->imported.alias.aliased[i].length, - (reg->flags & mask & gwt_mask) | attr); - - if (err) - goto bad_insert; - } - } - } else { - err = kbase_mmu_insert_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, - kctx->as_nr); - if (err) - goto bad_insert; - kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); - } - - return err; - -bad_insert: - if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { - u64 stride; - - stride = reg->gpu_alloc->imported.alias.stride; - KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); - while (i--) - if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { - kbase_mmu_teardown_pages(kctx->kbdev, - &kctx->mmu, - reg->start_pfn + (i * stride), - reg->gpu_alloc->imported.alias.aliased[i].length, - kctx->as_nr); - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); - } - } - - kbase_remove_va_region(reg); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_gpu_mmap); - -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable); - -int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - int err; - - if (reg->start_pfn == 0) - return 0; - - if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { - size_t i; - - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, reg->nr_pages, kctx->as_nr); - KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); - for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) - if (reg->gpu_alloc->imported.alias.aliased[i].alloc) - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); - } else { - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, - reg->start_pfn, kbase_reg_current_backed_size(reg), - kctx->as_nr); - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); - } - - if (reg->gpu_alloc && reg->gpu_alloc->type == - KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - struct kbase_alloc_import_user_buf *user_buf = - ®->gpu_alloc->imported.user_buf; - - if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { - user_buf->current_mapping_usage_count &= - ~PINNED_ON_IMPORT; - - kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, - (reg->flags & KBASE_REG_GPU_WR)); - } - } - - if (err) - return err; - - err = kbase_remove_va_region(reg); - return err; -} - -static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset) -{ - struct vm_area_struct *vma; - struct kbase_cpu_mapping *map; - unsigned long vm_pgoff_in_region; - unsigned long vm_off_in_region; - unsigned long map_start; - size_t map_size; - - lockdep_assert_held(¤t->mm->mmap_sem); - - if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ - return NULL; - - vma = find_vma_intersection(current->mm, uaddr, uaddr+size); - - if (!vma || vma->vm_start > uaddr) - return NULL; - if (vma->vm_ops != &kbase_vm_ops) - /* Not ours! */ - return NULL; - - map = vma->vm_private_data; - - if (map->kctx != kctx) - /* Not from this context! */ - return NULL; - - vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; - vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; - map_start = vma->vm_start - vm_off_in_region; - map_size = map->region->nr_pages << PAGE_SHIFT; - - if ((uaddr + size) > (map_start + map_size)) - /* Not within the CPU mapping */ - return NULL; - - *offset = (uaddr - vma->vm_start) + vm_off_in_region; - - return map; -} - -int kbasep_find_enclosing_cpu_mapping_offset( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset) -{ - struct kbase_cpu_mapping *map; - - kbase_os_mem_map_lock(kctx); - - map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); - - kbase_os_mem_map_unlock(kctx); - - if (!map) - return -EINVAL; - - return 0; -} - -KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); - -int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, - u64 gpu_addr, size_t size, u64 *start, u64 *offset) -{ - struct kbase_va_region *region; - - kbase_gpu_vm_lock(kctx); - - region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); - - if (!region) { - kbase_gpu_vm_unlock(kctx); - return -EINVAL; - } - - *start = region->start_pfn << PAGE_SHIFT; - - *offset = gpu_addr - *start; - - if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { - kbase_gpu_vm_unlock(kctx); - return -EINVAL; - } - - kbase_gpu_vm_unlock(kctx); - - return 0; -} - -KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); - -void kbase_sync_single(struct kbase_context *kctx, - struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, - off_t offset, size_t size, enum kbase_sync_type sync_fn) -{ - struct page *cpu_page; - phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); - phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa); - - cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); - - if (likely(cpu_pa == gpu_pa)) { - dma_addr_t dma_addr; - - BUG_ON(!cpu_page); - BUG_ON(offset + size > PAGE_SIZE); - - dma_addr = kbase_dma_addr(cpu_page) + offset; - if (sync_fn == KBASE_SYNC_TO_CPU) - dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, - size, DMA_BIDIRECTIONAL); - else if (sync_fn == KBASE_SYNC_TO_DEVICE) - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, - size, DMA_BIDIRECTIONAL); - } else { - void *src = NULL; - void *dst = NULL; - struct page *gpu_page; - - if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) - return; - - gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); - - if (sync_fn == KBASE_SYNC_TO_DEVICE) { - src = ((unsigned char *)kmap(cpu_page)) + offset; - dst = ((unsigned char *)kmap(gpu_page)) + offset; - } else if (sync_fn == KBASE_SYNC_TO_CPU) { - dma_sync_single_for_cpu(kctx->kbdev->dev, - kbase_dma_addr(gpu_page) + offset, - size, DMA_BIDIRECTIONAL); - src = ((unsigned char *)kmap(gpu_page)) + offset; - dst = ((unsigned char *)kmap(cpu_page)) + offset; - } - memcpy(dst, src, size); - kunmap(gpu_page); - kunmap(cpu_page); - if (sync_fn == KBASE_SYNC_TO_DEVICE) - dma_sync_single_for_device(kctx->kbdev->dev, - kbase_dma_addr(gpu_page) + offset, - size, DMA_BIDIRECTIONAL); - } -} - -static int kbase_do_syncset(struct kbase_context *kctx, - struct basep_syncset *sset, enum kbase_sync_type sync_fn) -{ - int err = 0; - struct kbase_va_region *reg; - struct kbase_cpu_mapping *map; - unsigned long start; - size_t size; - struct tagged_addr *cpu_pa; - struct tagged_addr *gpu_pa; - u64 page_off, page_count; - u64 i; - u64 offset; - - kbase_os_mem_map_lock(kctx); - kbase_gpu_vm_lock(kctx); - - /* find the region where the virtual address is contained */ - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle.basep.handle); - if (!reg) { - dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", - sset->mem_handle.basep.handle); - err = -EINVAL; - goto out_unlock; - } - - if (!(reg->flags & KBASE_REG_CPU_CACHED) || - kbase_mem_is_imported(reg->gpu_alloc->type)) - goto out_unlock; - - start = (uintptr_t)sset->user_addr; - size = (size_t)sset->size; - - map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); - if (!map) { - dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle.basep.handle); - err = -EINVAL; - goto out_unlock; - } - - page_off = offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - cpu_pa = kbase_get_cpu_phy_pages(reg); - gpu_pa = kbase_get_gpu_phy_pages(reg); - - if (page_off > reg->nr_pages || - page_off + page_count > reg->nr_pages) { - /* Sync overflows the region */ - err = -EINVAL; - goto out_unlock; - } - - /* Sync first page */ - if (as_phys_addr_t(cpu_pa[page_off])) { - size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); - - kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], - offset, sz, sync_fn); - } - - /* Sync middle pages (if any) */ - for (i = 1; page_count > 2 && i < page_count - 1; i++) { - /* we grow upwards, so bail on first non-present page */ - if (!as_phys_addr_t(cpu_pa[page_off + i])) - break; - - kbase_sync_single(kctx, cpu_pa[page_off + i], - gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); - } - - /* Sync last page (if any) */ - if (page_count > 1 && - as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { - size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; - - kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], - gpu_pa[page_off + page_count - 1], 0, sz, - sync_fn); - } - -out_unlock: - kbase_gpu_vm_unlock(kctx); - kbase_os_mem_map_unlock(kctx); - return err; -} - -int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) -{ - int err = -EINVAL; - - KBASE_DEBUG_ASSERT(kctx != NULL); - KBASE_DEBUG_ASSERT(sset != NULL); - - if (sset->mem_handle.basep.handle & ~PAGE_MASK) { - dev_warn(kctx->kbdev->dev, - "mem_handle: passed parameter is invalid"); - return -EINVAL; - } - - switch (sset->type) { - case BASE_SYNCSET_OP_MSYNC: - err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); - break; - - case BASE_SYNCSET_OP_CSYNC: - err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); - break; - - default: - dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); - break; - } - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_sync_now); - -/* vm lock must be held */ -int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - int err; - - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); - lockdep_assert_held(&kctx->reg_lock); - - if (reg->flags & KBASE_REG_JIT) { - dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n"); - return -EINVAL; - } - - /* - * Unlink the physical allocation before unmaking it evictable so - * that the allocation isn't grown back to its last backed size - * as we're going to unmap it anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - /* - * If a region has been made evictable then we must unmake it - * before trying to free it. - * If the memory hasn't been reclaimed it will be unmapped and freed - * below, if it has been reclaimed then the operations below are no-ops. - */ - if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - - err = kbase_gpu_munmap(kctx, reg); - if (err) { - dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); - goto out; - } - - /* This will also free the physical pages */ - kbase_free_alloced_region(reg); - - out: - return err; -} - -KBASE_EXPORT_TEST_API(kbase_mem_free_region); - -/** - * @brief Free the region from the GPU and unregister it. - * - * This function implements the free operation on a memory segment. - * It will loudly fail if called with outstanding mappings. - */ -int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) -{ - int err = 0; - struct kbase_va_region *reg; - - KBASE_DEBUG_ASSERT(kctx != NULL); - - if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { - dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); - return -EINVAL; - } - - if (0 == gpu_addr) { - dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); - return -EINVAL; - } - kbase_gpu_vm_lock(kctx); - - if (gpu_addr >= BASE_MEM_COOKIE_BASE && - gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { - int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); - - reg = kctx->pending_regions[cookie]; - if (!reg) { - err = -EINVAL; - goto out_unlock; - } - - /* ask to unlink the cookie as we'll free it */ - - kctx->pending_regions[cookie] = NULL; - kctx->cookies |= (1UL << cookie); - - kbase_free_alloced_region(reg); - } else { - /* A real GPU va */ - /* Validate the region */ - reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) { - dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", - gpu_addr); - err = -EINVAL; - goto out_unlock; - } - - if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { - /* SAME_VA must be freed through munmap */ - dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, - gpu_addr); - err = -EINVAL; - goto out_unlock; - } - err = kbase_mem_free_region(kctx, reg); - } - - out_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -} - -KBASE_EXPORT_TEST_API(kbase_mem_free); - -int kbase_update_region_flags(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned long flags) -{ - KBASE_DEBUG_ASSERT(NULL != reg); - KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); - - reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); - /* all memory is now growable */ - reg->flags |= KBASE_REG_GROWABLE; - - if (flags & BASE_MEM_GROW_ON_GPF) - reg->flags |= KBASE_REG_PF_GROW; - - if (flags & BASE_MEM_PROT_CPU_WR) - reg->flags |= KBASE_REG_CPU_WR; - - if (flags & BASE_MEM_PROT_CPU_RD) - reg->flags |= KBASE_REG_CPU_RD; - - if (flags & BASE_MEM_PROT_GPU_WR) - reg->flags |= KBASE_REG_GPU_WR; - - if (flags & BASE_MEM_PROT_GPU_RD) - reg->flags |= KBASE_REG_GPU_RD; - - if (0 == (flags & BASE_MEM_PROT_GPU_EX)) - reg->flags |= KBASE_REG_GPU_NX; - - if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { - if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && - !(flags & BASE_MEM_UNCACHED_GPU)) - return -EINVAL; - } else if (flags & (BASE_MEM_COHERENT_SYSTEM | - BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { - reg->flags |= KBASE_REG_SHARE_BOTH; - } - - if (!(reg->flags & KBASE_REG_SHARE_BOTH) && - flags & BASE_MEM_COHERENT_LOCAL) { - reg->flags |= KBASE_REG_SHARE_IN; - } - - if (flags & BASE_MEM_TILER_ALIGN_TOP) - reg->flags |= KBASE_REG_TILER_ALIGN_TOP; - - - /* Set up default MEMATTR usage */ - if (!(reg->flags & KBASE_REG_GPU_CACHED)) { - if (kctx->kbdev->mmu_mode->flags & - KBASE_MMU_MODE_HAS_NON_CACHEABLE) { - /* Override shareability, and MEMATTR for uncached */ - reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); - reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); - } else { - dev_warn(kctx->kbdev->dev, - "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); - return -EINVAL; - } - } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && - (reg->flags & KBASE_REG_SHARE_BOTH)) { - reg->flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); - } else { - reg->flags |= - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); - } - - if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING) - reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; - - if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) - reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; - - return 0; -} - -int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested) -{ - int new_page_count __maybe_unused; - size_t nr_left = nr_pages_requested; - int res; - struct kbase_context *kctx; - struct tagged_addr *tp; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); - - if (alloc->reg) { - if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) - goto invalid_request; - } - - kctx = alloc->imported.native.kctx; - - if (nr_pages_requested == 0) - goto done; /*nothing to do*/ - - new_page_count = kbase_atomic_add_pages( - nr_pages_requested, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - - /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer */ - kbase_process_page_usage_inc(kctx, nr_pages_requested); - - tp = alloc->pages + alloc->nents; - -#ifdef CONFIG_MALI_2MB_ALLOC - /* Check if we have enough pages requested so we can allocate a large - * page (512 * 4KB = 2MB ) - */ - if (nr_left >= (SZ_2M / SZ_4K)) { - int nr_lp = nr_left / (SZ_2M / SZ_4K); - - res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool, - nr_lp * (SZ_2M / SZ_4K), - tp, - true); - - if (res > 0) { - nr_left -= res; - tp += res; - } - - if (nr_left) { - struct kbase_sub_alloc *sa, *temp_sa; - - spin_lock(&kctx->mem_partials_lock); - - list_for_each_entry_safe(sa, temp_sa, - &kctx->mem_partials, link) { - int pidx = 0; - - while (nr_left) { - pidx = find_next_zero_bit(sa->sub_pages, - SZ_2M / SZ_4K, - pidx); - bitmap_set(sa->sub_pages, pidx, 1); - *tp++ = as_tagged_tag(page_to_phys(sa->page + - pidx), - FROM_PARTIAL); - nr_left--; - - if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { - /* unlink from partial list when full */ - list_del_init(&sa->link); - break; - } - } - } - spin_unlock(&kctx->mem_partials_lock); - } - - /* only if we actually have a chunk left <512. If more it indicates - * that we couldn't allocate a 2MB above, so no point to retry here. - */ - if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { - /* create a new partial and suballocate the rest from it */ - struct page *np = NULL; - - do { - int err; - - np = kbase_mem_pool_alloc(&kctx->lp_mem_pool); - if (np) - break; - err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1); - if (err) - break; - } while (1); - - if (np) { - int i; - struct kbase_sub_alloc *sa; - struct page *p; - - sa = kmalloc(sizeof(*sa), GFP_KERNEL); - if (!sa) { - kbase_mem_pool_free(&kctx->lp_mem_pool, np, false); - goto no_new_partial; - } - - /* store pointers back to the control struct */ - np->lru.next = (void *)sa; - for (p = np; p < np + SZ_2M / SZ_4K; p++) - p->lru.prev = (void *)np; - INIT_LIST_HEAD(&sa->link); - bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); - sa->page = np; - - for (i = 0; i < nr_left; i++) - *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); - - bitmap_set(sa->sub_pages, 0, nr_left); - nr_left = 0; - - /* expose for later use */ - spin_lock(&kctx->mem_partials_lock); - list_add(&sa->link, &kctx->mem_partials); - spin_unlock(&kctx->mem_partials_lock); - } - } - } -no_new_partial: -#endif - - if (nr_left) { - res = kbase_mem_pool_alloc_pages(&kctx->mem_pool, - nr_left, - tp, - false); - if (res <= 0) - goto alloc_failed; - } - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); - - alloc->nents += nr_pages_requested; -done: - return 0; - -alloc_failed: - /* rollback needed if got one or more 2MB but failed later */ - if (nr_left != nr_pages_requested) { - size_t nr_pages_to_free = nr_pages_requested - nr_left; - - alloc->nents += nr_pages_to_free; - - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); - } - - kbase_process_page_usage_dec(kctx, nr_pages_requested); - kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - -invalid_request: - return -ENOMEM; -} - -struct tagged_addr *kbase_alloc_phy_pages_helper_locked( - struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, - size_t nr_pages_requested, - struct kbase_sub_alloc **prealloc_sa) -{ - int new_page_count __maybe_unused; - size_t nr_left = nr_pages_requested; - int res; - struct kbase_context *kctx; - struct tagged_addr *tp; - struct tagged_addr *new_pages = NULL; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); - - lockdep_assert_held(&pool->pool_lock); - -#if !defined(CONFIG_MALI_2MB_ALLOC) - WARN_ON(pool->order); -#endif - - if (alloc->reg) { - if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) - goto invalid_request; - } - - kctx = alloc->imported.native.kctx; - - lockdep_assert_held(&kctx->mem_partials_lock); - - if (nr_pages_requested == 0) - goto done; /*nothing to do*/ - - new_page_count = kbase_atomic_add_pages( - nr_pages_requested, &kctx->used_pages); - kbase_atomic_add_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - - /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer - */ - kbase_process_page_usage_inc(kctx, nr_pages_requested); - - tp = alloc->pages + alloc->nents; - new_pages = tp; - -#ifdef CONFIG_MALI_2MB_ALLOC - if (pool->order) { - int nr_lp = nr_left / (SZ_2M / SZ_4K); - - res = kbase_mem_pool_alloc_pages_locked(pool, - nr_lp * (SZ_2M / SZ_4K), - tp); - - if (res > 0) { - nr_left -= res; - tp += res; - } - - if (nr_left) { - struct kbase_sub_alloc *sa, *temp_sa; - - list_for_each_entry_safe(sa, temp_sa, - &kctx->mem_partials, link) { - int pidx = 0; - - while (nr_left) { - pidx = find_next_zero_bit(sa->sub_pages, - SZ_2M / SZ_4K, - pidx); - bitmap_set(sa->sub_pages, pidx, 1); - *tp++ = as_tagged_tag(page_to_phys( - sa->page + pidx), - FROM_PARTIAL); - nr_left--; - - if (bitmap_full(sa->sub_pages, - SZ_2M / SZ_4K)) { - /* unlink from partial list when - * full - */ - list_del_init(&sa->link); - break; - } - } - } - } - - /* only if we actually have a chunk left <512. If more it - * indicates that we couldn't allocate a 2MB above, so no point - * to retry here. - */ - if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { - /* create a new partial and suballocate the rest from it - */ - struct page *np = NULL; - - np = kbase_mem_pool_alloc_locked(pool); - - if (np) { - int i; - struct kbase_sub_alloc *const sa = *prealloc_sa; - struct page *p; - - /* store pointers back to the control struct */ - np->lru.next = (void *)sa; - for (p = np; p < np + SZ_2M / SZ_4K; p++) - p->lru.prev = (void *)np; - INIT_LIST_HEAD(&sa->link); - bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); - sa->page = np; - - for (i = 0; i < nr_left; i++) - *tp++ = as_tagged_tag( - page_to_phys(np + i), - FROM_PARTIAL); - - bitmap_set(sa->sub_pages, 0, nr_left); - nr_left = 0; - /* Indicate to user that we'll free this memory - * later. - */ - *prealloc_sa = NULL; - - /* expose for later use */ - list_add(&sa->link, &kctx->mem_partials); - } - } - if (nr_left) - goto alloc_failed; - } else { -#endif - res = kbase_mem_pool_alloc_pages_locked(pool, - nr_left, - tp); - if (res <= 0) - goto alloc_failed; -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); - - alloc->nents += nr_pages_requested; -done: - return new_pages; - -alloc_failed: - /* rollback needed if got one or more 2MB but failed later */ - if (nr_left != nr_pages_requested) { - size_t nr_pages_to_free = nr_pages_requested - nr_left; - - struct tagged_addr *start_free = alloc->pages + alloc->nents; - -#ifdef CONFIG_MALI_2MB_ALLOC - if (pool->order) { - while (nr_pages_to_free) { - if (is_huge_head(*start_free)) { - kbase_mem_pool_free_pages_locked( - pool, 512, - start_free, - false, /* not dirty */ - true); /* return to pool */ - nr_pages_to_free -= 512; - start_free += 512; - } else if (is_partial(*start_free)) { - free_partial_locked(kctx, pool, - *start_free); - nr_pages_to_free--; - start_free++; - } - } - } else { -#endif - kbase_mem_pool_free_pages_locked(pool, - nr_pages_to_free, - start_free, - false, /* not dirty */ - true); /* return to pool */ -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - } - - kbase_process_page_usage_dec(kctx, nr_pages_requested); - kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); - -invalid_request: - return NULL; -} - -static void free_partial(struct kbase_context *kctx, struct tagged_addr tp) -{ - struct page *p, *head_page; - struct kbase_sub_alloc *sa; - - p = as_page(tp); - head_page = (struct page *)p->lru.prev; - sa = (struct kbase_sub_alloc *)head_page->lru.next; - spin_lock(&kctx->mem_partials_lock); - clear_bit(p - head_page, sa->sub_pages); - if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { - list_del(&sa->link); - kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true); - kfree(sa); - } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == - SZ_2M / SZ_4K - 1) { - /* expose the partial again */ - list_add(&sa->link, &kctx->mem_partials); - } - spin_unlock(&kctx->mem_partials_lock); -} - -int kbase_free_phy_pages_helper( - struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_to_free) -{ - struct kbase_context *kctx = alloc->imported.native.kctx; - bool syncback; - bool reclaimed = (alloc->evicted != 0); - struct tagged_addr *start_free; - int new_page_count __maybe_unused; - size_t freed = 0; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); - KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); - - /* early out if nothing to do */ - if (0 == nr_pages_to_free) - return 0; - - start_free = alloc->pages + alloc->nents - nr_pages_to_free; - - syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - - /* pad start_free to a valid start location */ - while (nr_pages_to_free && is_huge(*start_free) && - !is_huge_head(*start_free)) { - nr_pages_to_free--; - start_free++; - } - - while (nr_pages_to_free) { - if (is_huge_head(*start_free)) { - /* This is a 2MB entry, so free all the 512 pages that - * it points to - */ - kbase_mem_pool_free_pages(&kctx->lp_mem_pool, - 512, - start_free, - syncback, - reclaimed); - nr_pages_to_free -= 512; - start_free += 512; - freed += 512; - } else if (is_partial(*start_free)) { - free_partial(kctx, *start_free); - nr_pages_to_free--; - start_free++; - freed++; - } else { - struct tagged_addr *local_end_free; - - local_end_free = start_free; - while (nr_pages_to_free && - !is_huge(*local_end_free) && - !is_partial(*local_end_free)) { - local_end_free++; - nr_pages_to_free--; - } - kbase_mem_pool_free_pages(&kctx->mem_pool, - local_end_free - start_free, - start_free, - syncback, - reclaimed); - freed += local_end_free - start_free; - start_free += local_end_free - start_free; - } - } - - alloc->nents -= freed; - - /* - * If the allocation was not evicted (i.e. evicted == 0) then - * the page accounting needs to be done. - */ - if (!reclaimed) { - kbase_process_page_usage_dec(kctx, freed); - new_page_count = kbase_atomic_sub_pages(freed, - &kctx->used_pages); - kbase_atomic_sub_pages(freed, - &kctx->kbdev->memdev.used_pages); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); - } - - return 0; -} - -static void free_partial_locked(struct kbase_context *kctx, - struct kbase_mem_pool *pool, struct tagged_addr tp) -{ - struct page *p, *head_page; - struct kbase_sub_alloc *sa; - - lockdep_assert_held(&pool->pool_lock); - lockdep_assert_held(&kctx->mem_partials_lock); - - p = as_page(tp); - head_page = (struct page *)p->lru.prev; - sa = (struct kbase_sub_alloc *)head_page->lru.next; - clear_bit(p - head_page, sa->sub_pages); - if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { - list_del(&sa->link); - kbase_mem_pool_free_locked(pool, head_page, true); - kfree(sa); - } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == - SZ_2M / SZ_4K - 1) { - /* expose the partial again */ - list_add(&sa->link, &kctx->mem_partials); - } -} - -void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, - struct kbase_mem_pool *pool, struct tagged_addr *pages, - size_t nr_pages_to_free) -{ - struct kbase_context *kctx = alloc->imported.native.kctx; - bool syncback; - bool reclaimed = (alloc->evicted != 0); - struct tagged_addr *start_free; - size_t freed = 0; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); - KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); - KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); - - lockdep_assert_held(&pool->pool_lock); - lockdep_assert_held(&kctx->mem_partials_lock); - - /* early out if nothing to do */ - if (!nr_pages_to_free) - return; - - start_free = pages; - - syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - - /* pad start_free to a valid start location */ - while (nr_pages_to_free && is_huge(*start_free) && - !is_huge_head(*start_free)) { - nr_pages_to_free--; - start_free++; - } - - while (nr_pages_to_free) { - if (is_huge_head(*start_free)) { - /* This is a 2MB entry, so free all the 512 pages that - * it points to - */ - WARN_ON(!pool->order); - kbase_mem_pool_free_pages_locked(pool, - 512, - start_free, - syncback, - reclaimed); - nr_pages_to_free -= 512; - start_free += 512; - freed += 512; - } else if (is_partial(*start_free)) { - WARN_ON(!pool->order); - free_partial_locked(kctx, pool, *start_free); - nr_pages_to_free--; - start_free++; - freed++; - } else { - struct tagged_addr *local_end_free; - - WARN_ON(pool->order); - local_end_free = start_free; - while (nr_pages_to_free && - !is_huge(*local_end_free) && - !is_partial(*local_end_free)) { - local_end_free++; - nr_pages_to_free--; - } - kbase_mem_pool_free_pages_locked(pool, - local_end_free - start_free, - start_free, - syncback, - reclaimed); - freed += local_end_free - start_free; - start_free += local_end_free - start_free; - } - } - - alloc->nents -= freed; - - /* - * If the allocation was not evicted (i.e. evicted == 0) then - * the page accounting needs to be done. - */ - if (!reclaimed) { - int new_page_count; - - kbase_process_page_usage_dec(kctx, freed); - new_page_count = kbase_atomic_sub_pages(freed, - &kctx->used_pages); - kbase_atomic_sub_pages(freed, - &kctx->kbdev->memdev.used_pages); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); - } -} - -void kbase_mem_kref_free(struct kref *kref) -{ - struct kbase_mem_phy_alloc *alloc; - - alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); - - switch (alloc->type) { - case KBASE_MEM_TYPE_NATIVE: { - - if (!WARN_ON(!alloc->imported.native.kctx)) { - if (alloc->permanent_map) - kbase_phy_alloc_mapping_term( - alloc->imported.native.kctx, - alloc); - - /* - * The physical allocation must have been removed from - * the eviction list before trying to free it. - */ - mutex_lock( - &alloc->imported.native.kctx->jit_evict_lock); - WARN_ON(!list_empty(&alloc->evict_node)); - mutex_unlock( - &alloc->imported.native.kctx->jit_evict_lock); - - kbase_process_page_usage_dec( - alloc->imported.native.kctx, - alloc->imported.native.nr_struct_pages); - } - kbase_free_phy_pages_helper(alloc, alloc->nents); - break; - } - case KBASE_MEM_TYPE_ALIAS: { - /* just call put on the underlying phy allocs */ - size_t i; - struct kbase_aliased *aliased; - - aliased = alloc->imported.alias.aliased; - if (aliased) { - for (i = 0; i < alloc->imported.alias.nents; i++) - if (aliased[i].alloc) - kbase_mem_phy_alloc_put(aliased[i].alloc); - vfree(aliased); - } - break; - } - case KBASE_MEM_TYPE_RAW: - /* raw pages, external cleanup */ - break; -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: - dma_buf_detach(alloc->imported.umm.dma_buf, - alloc->imported.umm.dma_attachment); - dma_buf_put(alloc->imported.umm.dma_buf); - break; -#endif - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - if (alloc->imported.user_buf.mm) - mmdrop(alloc->imported.user_buf.mm); - if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) - vfree(alloc->imported.user_buf.pages); - else - kfree(alloc->imported.user_buf.pages); - break; - default: - WARN(1, "Unexecpted free of type %d\n", alloc->type); - break; - } - - /* Free based on allocation type */ - if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) - vfree(alloc); - else - kfree(alloc); -} - -KBASE_EXPORT_TEST_API(kbase_mem_kref_free); - -int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) -{ - KBASE_DEBUG_ASSERT(NULL != reg); - KBASE_DEBUG_ASSERT(vsize > 0); - - /* validate user provided arguments */ - if (size > vsize || vsize > reg->nr_pages) - goto out_term; - - /* Prevent vsize*sizeof from wrapping around. - * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. - */ - if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) - goto out_term; - - KBASE_DEBUG_ASSERT(0 != vsize); - - if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) - goto out_term; - - reg->cpu_alloc->reg = reg; - if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) - goto out_rollback; - reg->gpu_alloc->reg = reg; - } - - return 0; - -out_rollback: - kbase_free_phy_pages_helper(reg->cpu_alloc, size); -out_term: - return -1; -} - -KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); - -bool kbase_check_alloc_flags(unsigned long flags) -{ - /* Only known input flags should be set. */ - if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) - return false; - - /* At least one flag should be set */ - if (flags == 0) - return false; - - /* Either the GPU or CPU must be reading from the allocated memory */ - if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) - return false; - - /* Either the GPU or CPU must be writing to the allocated memory */ - if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) - return false; - - /* GPU executable memory cannot: - * - Be written by the GPU - * - Be grown on GPU page fault - * - Have the top of its initial commit aligned to 'extent' */ - if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_TILER_ALIGN_TOP))) - return false; - - /* To have an allocation lie within a 4GB chunk is required only for - * TLS memory, which will never be used to contain executable code - * and also used for Tiler heap. - */ - if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) - return false; - - /* GPU should have at least read or write access otherwise there is no - reason for allocating. */ - if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) - return false; - - /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ - if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) - return false; - - /* Should not combine BASE_MEM_COHERENT_LOCAL with - * BASE_MEM_COHERENT_SYSTEM */ - if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == - (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) - return false; - - return true; -} - -bool kbase_check_import_flags(unsigned long flags) -{ - /* Only known input flags should be set. */ - if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) - return false; - - /* At least one flag should be set */ - if (flags == 0) - return false; - - /* Imported memory cannot be GPU executable */ - if (flags & BASE_MEM_PROT_GPU_EX) - return false; - - /* Imported memory cannot grow on page fault */ - if (flags & BASE_MEM_GROW_ON_GPF) - return false; - - /* Imported memory cannot be aligned to the end of its initial commit */ - if (flags & BASE_MEM_TILER_ALIGN_TOP) - return false; - - /* GPU should have at least read or write access otherwise there is no - reason for importing. */ - if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) - return false; - - /* Secure memory cannot be read by the CPU */ - if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD)) - return false; - - return true; -} - -int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 large_extent) -{ - struct device *dev = kctx->kbdev->dev; - int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; - u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; - struct kbase_va_region test_reg; - - /* kbase_va_region's extent member can be of variable size, so check against that type */ - test_reg.extent = large_extent; - -#define KBASE_MSG_PRE "GPU allocation attempted with " - - if (0 == va_pages) { - dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); - return -EINVAL; - } - - if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { - dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", - (unsigned long long)va_pages); - return -ENOMEM; - } - - /* Note: commit_pages is checked against va_pages during - * kbase_alloc_phy_pages() */ - - /* Limit GPU executable allocs to GPU PC size */ - if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", - (unsigned long long)va_pages, - (unsigned long long)gpu_pc_pages_max); - - return -EINVAL; - } - - if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent == 0) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); - return -EINVAL; - } - - if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent != 0) { - dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); - return -EINVAL; - } - - /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ - if (flags & BASE_MEM_TILER_ALIGN_TOP) { -#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " - unsigned long small_extent; - - if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", - (unsigned long long)large_extent, - BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); - return -EINVAL; - } - /* For use with is_power_of_2, which takes unsigned long, so - * must ensure e.g. on 32-bit kernel it'll fit in that type */ - small_extent = (unsigned long)large_extent; - - if (!is_power_of_2(small_extent)) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", - small_extent); - return -EINVAL; - } - - if (commit_pages > large_extent) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", - (unsigned long)commit_pages, - (unsigned long)large_extent); - return -EINVAL; - } -#undef KBASE_MSG_PRE_FLAG - } - - if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && - (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", - (unsigned long long)va_pages); - return -EINVAL; - } - - return 0; -#undef KBASE_MSG_PRE -} - -/** - * @brief Acquire the per-context region list lock - */ -void kbase_gpu_vm_lock(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx != NULL); - mutex_lock(&kctx->reg_lock); -} - -KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); - -/** - * @brief Release the per-context region list lock - */ -void kbase_gpu_vm_unlock(struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(kctx != NULL); - mutex_unlock(&kctx->reg_lock); -} - -KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); - -#ifdef CONFIG_DEBUG_FS -struct kbase_jit_debugfs_data { - int (*func)(struct kbase_jit_debugfs_data *); - struct mutex lock; - struct kbase_context *kctx; - u64 active_value; - u64 pool_value; - u64 destroy_value; - char buffer[50]; -}; - -static int kbase_jit_debugfs_common_open(struct inode *inode, - struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) -{ - struct kbase_jit_debugfs_data *data; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->func = func; - mutex_init(&data->lock); - data->kctx = (struct kbase_context *) inode->i_private; - - file->private_data = data; - - return nonseekable_open(inode, file); -} - -static ssize_t kbase_jit_debugfs_common_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) -{ - struct kbase_jit_debugfs_data *data; - size_t size; - int ret; - - data = (struct kbase_jit_debugfs_data *) file->private_data; - mutex_lock(&data->lock); - - if (*ppos) { - size = strnlen(data->buffer, sizeof(data->buffer)); - } else { - if (!data->func) { - ret = -EACCES; - goto out_unlock; - } - - if (data->func(data)) { - ret = -EACCES; - goto out_unlock; - } - - size = scnprintf(data->buffer, sizeof(data->buffer), - "%llu,%llu,%llu", data->active_value, - data->pool_value, data->destroy_value); - } - - ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); - -out_unlock: - mutex_unlock(&data->lock); - return ret; -} - -static int kbase_jit_debugfs_common_release(struct inode *inode, - struct file *file) -{ - kfree(file->private_data); - return 0; -} - -#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - return kbase_jit_debugfs_common_open(inode, file, __func); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = kbase_jit_debugfs_common_release, \ - .read = kbase_jit_debugfs_common_read, \ - .write = NULL, \ - .llseek = generic_file_llseek, \ -} - -static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct list_head *tmp; - - mutex_lock(&kctx->jit_evict_lock); - list_for_each(tmp, &kctx->jit_active_head) { - data->active_value++; - } - - list_for_each(tmp, &kctx->jit_pool_head) { - data->pool_value++; - } - - list_for_each(tmp, &kctx->jit_destroy_head) { - data->destroy_value++; - } - mutex_unlock(&kctx->jit_evict_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, - kbase_jit_debugfs_count_get); - -static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct kbase_va_region *reg; - - mutex_lock(&kctx->jit_evict_lock); - list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { - data->active_value += reg->nr_pages; - } - - list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { - data->pool_value += reg->nr_pages; - } - - list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { - data->destroy_value += reg->nr_pages; - } - mutex_unlock(&kctx->jit_evict_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, - kbase_jit_debugfs_vm_get); - -static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct kbase_va_region *reg; - - mutex_lock(&kctx->jit_evict_lock); - list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { - data->active_value += reg->gpu_alloc->nents; - } - - list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { - data->pool_value += reg->gpu_alloc->nents; - } - - list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { - data->destroy_value += reg->gpu_alloc->nents; - } - mutex_unlock(&kctx->jit_evict_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, - kbase_jit_debugfs_phys_get); - -void kbase_jit_debugfs_init(struct kbase_context *kctx) -{ - /* Debugfs entry for getting the number of JIT allocations. */ - debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_count_fops); - - /* - * Debugfs entry for getting the total number of virtual pages - * used by JIT allocations. - */ - debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_vm_fops); - - /* - * Debugfs entry for getting the number of physical pages used - * by JIT allocations. - */ - debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_phys_fops); -} -#endif /* CONFIG_DEBUG_FS */ - -/** - * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations - * @work: Work item - * - * This function does the work of freeing JIT allocations whose physical - * backing has been released. - */ -static void kbase_jit_destroy_worker(struct work_struct *work) -{ - struct kbase_context *kctx; - struct kbase_va_region *reg; - - kctx = container_of(work, struct kbase_context, jit_work); - do { - mutex_lock(&kctx->jit_evict_lock); - if (list_empty(&kctx->jit_destroy_head)) { - mutex_unlock(&kctx->jit_evict_lock); - break; - } - - reg = list_first_entry(&kctx->jit_destroy_head, - struct kbase_va_region, jit_node); - - list_del(®->jit_node); - mutex_unlock(&kctx->jit_evict_lock); - - kbase_gpu_vm_lock(kctx); - reg->flags &= ~KBASE_REG_JIT; - kbase_mem_free_region(kctx, reg); - kbase_gpu_vm_unlock(kctx); - } while (1); -} - -int kbase_jit_init(struct kbase_context *kctx) -{ - mutex_lock(&kctx->jit_evict_lock); - INIT_LIST_HEAD(&kctx->jit_active_head); - INIT_LIST_HEAD(&kctx->jit_pool_head); - INIT_LIST_HEAD(&kctx->jit_destroy_head); - INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); - - INIT_LIST_HEAD(&kctx->jit_pending_alloc); - INIT_LIST_HEAD(&kctx->jit_atoms_head); - mutex_unlock(&kctx->jit_evict_lock); - - kctx->jit_max_allocations = 0; - kctx->jit_current_allocations = 0; - kctx->trim_level = 0; - - return 0; -} - -/* Check if the allocation from JIT pool is of the same size as the new JIT - * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets - * the alignment requirements. - */ -static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, - struct kbase_va_region *walker, struct base_jit_alloc_info *info) -{ - bool meet_reqs = true; - - if (walker->nr_pages != info->va_pages) - meet_reqs = false; - else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { - size_t align = info->extent; - size_t align_mask = align - 1; - - if ((walker->start_pfn + info->commit_pages) & align_mask) - meet_reqs = false; - } - - return meet_reqs; -} - -static int kbase_jit_grow(struct kbase_context *kctx, - struct base_jit_alloc_info *info, struct kbase_va_region *reg) -{ - size_t delta; - size_t pages_required; - size_t old_size; - struct kbase_mem_pool *pool; - int ret = -ENOMEM; - struct tagged_addr *gpu_pages; - struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; - int i; - - if (info->commit_pages > reg->nr_pages) { - /* Attempted to grow larger than maximum size */ - return -EINVAL; - } - - kbase_gpu_vm_lock(kctx); - - /* Make the physical backing no longer reclaimable */ - if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) - goto update_failed; - - if (reg->gpu_alloc->nents >= info->commit_pages) - goto done; - - /* Grow the backing */ - old_size = reg->gpu_alloc->nents; - - /* Allocate some more pages */ - delta = info->commit_pages - reg->gpu_alloc->nents; - pages_required = delta; - -#ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), - GFP_KERNEL); - if (!prealloc_sas[i]) - goto update_failed; - } - - if (pages_required >= (SZ_2M / SZ_4K)) { - pool = &kctx->lp_mem_pool; - /* Round up to number of 2 MB pages required */ - pages_required += ((SZ_2M / SZ_4K) - 1); - pages_required /= (SZ_2M / SZ_4K); - } else { -#endif - pool = &kctx->mem_pool; -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - - if (reg->cpu_alloc != reg->gpu_alloc) - pages_required *= 2; - - spin_lock(&kctx->mem_partials_lock); - kbase_mem_pool_lock(pool); - - /* As we can not allocate memory from the kernel with the vm_lock held, - * grow the pool to the required size with the lock dropped. We hold the - * pool lock to prevent another thread from allocating from the pool - * between the grow and allocation. - */ - while (kbase_mem_pool_size(pool) < pages_required) { - int pool_delta = pages_required - kbase_mem_pool_size(pool); - - kbase_mem_pool_unlock(pool); - spin_unlock(&kctx->mem_partials_lock); - kbase_gpu_vm_unlock(kctx); - - if (kbase_mem_pool_grow(pool, pool_delta)) - goto update_failed_unlocked; - - kbase_gpu_vm_lock(kctx); - spin_lock(&kctx->mem_partials_lock); - kbase_mem_pool_lock(pool); - } - - gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, - delta, &prealloc_sas[0]); - if (!gpu_pages) { - kbase_mem_pool_unlock(pool); - spin_unlock(&kctx->mem_partials_lock); - goto update_failed; - } - - if (reg->cpu_alloc != reg->gpu_alloc) { - struct tagged_addr *cpu_pages; - - cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, - pool, delta, &prealloc_sas[1]); - if (!cpu_pages) { - kbase_free_phy_pages_helper_locked(reg->gpu_alloc, - pool, gpu_pages, delta); - kbase_mem_pool_unlock(pool); - spin_unlock(&kctx->mem_partials_lock); - goto update_failed; - } - } - kbase_mem_pool_unlock(pool); - spin_unlock(&kctx->mem_partials_lock); - - ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, - old_size); - /* - * The grow failed so put the allocation back in the - * pool and return failure. - */ - if (ret) - goto update_failed; - -done: - ret = 0; - - /* Update attributes of JIT allocation taken from the pool */ - reg->initial_commit = info->commit_pages; - reg->extent = info->extent; - -update_failed: - kbase_gpu_vm_unlock(kctx); -update_failed_unlocked: - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) - kfree(prealloc_sas[i]); - - return ret; -} - -static void trace_jit_stats(struct kbase_context *kctx, - u32 bin_id, u32 max_allocations) -{ - const u32 alloc_count = - kctx->jit_current_allocations_per_bin[bin_id]; - - struct kbase_va_region *walker; - u32 va_pages = 0; - u32 ph_pages = 0; - - mutex_lock(&kctx->jit_evict_lock); - list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { - if (walker->jit_bin_id != bin_id) - continue; - - va_pages += walker->nr_pages; - ph_pages += walker->gpu_alloc->nents; - } - mutex_unlock(&kctx->jit_evict_lock); - - KBASE_TLSTREAM_AUX_JIT_STATS(kctx->id, bin_id, max_allocations, - alloc_count, va_pages, ph_pages); -} - -struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info) -{ - struct kbase_va_region *reg = NULL; - - if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { - /* Too many current allocations */ - return NULL; - } - if (info->max_allocations > 0 && - kctx->jit_current_allocations_per_bin[info->bin_id] >= - info->max_allocations) { - /* Too many current allocations in this bin */ - return NULL; - } - - mutex_lock(&kctx->jit_evict_lock); - - /* - * Scan the pool for an existing allocation which meets our - * requirements and remove it. - */ - if (info->usage_id != 0) { - /* First scan for an allocation with the same usage ID */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_usage_id == info->usage_id && - walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match */ - if (current_diff == 0) - break; - } - } - } - - if (!reg) { - /* No allocation with the same usage ID, or usage IDs not in - * use. Search for an allocation we can reuse. - */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match, so stop - * looking. - */ - if (current_diff == 0) - break; - } - } - } - - if (reg) { - /* - * Remove the found region from the pool and add it to the - * active list. - */ - list_move(®->jit_node, &kctx->jit_active_head); - - /* - * Remove the allocation from the eviction list as it's no - * longer eligible for eviction. This must be done before - * dropping the jit_evict_lock - */ - list_del_init(®->gpu_alloc->evict_node); - mutex_unlock(&kctx->jit_evict_lock); - - if (kbase_jit_grow(kctx, info, reg) < 0) { - /* - * An update to an allocation from the pool failed, - * chances are slim a new allocation would fair any - * better so return the allocation to the pool and - * return the function with failure. - */ - goto update_failed_unlocked; - } - } else { - /* No suitable JIT allocation was found so create a new one */ - u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | - BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_COHERENT_LOCAL; - u64 gpu_addr; - - mutex_unlock(&kctx->jit_evict_lock); - - if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) - flags |= BASE_MEM_TILER_ALIGN_TOP; - - reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extent, &flags, &gpu_addr); - if (!reg) - goto out_unlocked; - - reg->flags |= KBASE_REG_JIT; - - mutex_lock(&kctx->jit_evict_lock); - list_add(®->jit_node, &kctx->jit_active_head); - mutex_unlock(&kctx->jit_evict_lock); - } - - kctx->jit_current_allocations++; - kctx->jit_current_allocations_per_bin[info->bin_id]++; - - trace_jit_stats(kctx, info->bin_id, info->max_allocations); - - reg->jit_usage_id = info->usage_id; - reg->jit_bin_id = info->bin_id; - - return reg; - -update_failed_unlocked: - mutex_lock(&kctx->jit_evict_lock); - list_move(®->jit_node, &kctx->jit_pool_head); - mutex_unlock(&kctx->jit_evict_lock); -out_unlocked: - return NULL; -} - -void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - u64 old_pages; - - /* Get current size of JIT region */ - old_pages = kbase_reg_current_backed_size(reg); - if (reg->initial_commit < old_pages) { - /* Free trim_level % of region, but don't go below initial - * commit size - */ - u64 new_size = MAX(reg->initial_commit, - div_u64(old_pages * (100 - kctx->trim_level), 100)); - u64 delta = old_pages - new_size; - - if (delta) { - kbase_mem_shrink_cpu_mapping(kctx, reg, old_pages-delta, - old_pages); - kbase_mem_shrink_gpu_mapping(kctx, reg, old_pages-delta, - old_pages); - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); - } - } - - kctx->jit_current_allocations--; - kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; - - trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); - - kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); - - kbase_gpu_vm_lock(kctx); - reg->flags |= KBASE_REG_DONT_NEED; - kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); - kbase_gpu_vm_unlock(kctx); - - /* - * Add the allocation to the eviction list and the jit pool, after this - * point the shrink can reclaim it, or it may be reused. - */ - mutex_lock(&kctx->jit_evict_lock); - - /* This allocation can't already be on a list. */ - WARN_ON(!list_empty(®->gpu_alloc->evict_node)); - list_add(®->gpu_alloc->evict_node, &kctx->evict_list); - - list_move(®->jit_node, &kctx->jit_pool_head); - - mutex_unlock(&kctx->jit_evict_lock); -} - -void kbase_jit_backing_lost(struct kbase_va_region *reg) -{ - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); - - if (WARN_ON(!kctx)) - return; - - lockdep_assert_held(&kctx->jit_evict_lock); - - /* - * JIT allocations will always be on a list, if the region - * is not on a list then it's not a JIT allocation. - */ - if (list_empty(®->jit_node)) - return; - - /* - * Freeing the allocation requires locks we might not be able - * to take now, so move the allocation to the free list and kick - * the worker which will do the freeing. - */ - list_move(®->jit_node, &kctx->jit_destroy_head); - - schedule_work(&kctx->jit_work); -} - -bool kbase_jit_evict(struct kbase_context *kctx) -{ - struct kbase_va_region *reg = NULL; - - lockdep_assert_held(&kctx->reg_lock); - - /* Free the oldest allocation from the pool */ - mutex_lock(&kctx->jit_evict_lock); - if (!list_empty(&kctx->jit_pool_head)) { - reg = list_entry(kctx->jit_pool_head.prev, - struct kbase_va_region, jit_node); - list_del(®->jit_node); - list_del_init(®->gpu_alloc->evict_node); - } - mutex_unlock(&kctx->jit_evict_lock); - - if (reg) { - reg->flags &= ~KBASE_REG_JIT; - kbase_mem_free_region(kctx, reg); - } - - return (reg != NULL); -} - -void kbase_jit_term(struct kbase_context *kctx) -{ - struct kbase_va_region *walker; - - /* Free all allocations for this context */ - - kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->jit_evict_lock); - /* Free all allocations from the pool */ - while (!list_empty(&kctx->jit_pool_head)) { - walker = list_first_entry(&kctx->jit_pool_head, - struct kbase_va_region, jit_node); - list_del(&walker->jit_node); - list_del_init(&walker->gpu_alloc->evict_node); - mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_JIT; - kbase_mem_free_region(kctx, walker); - mutex_lock(&kctx->jit_evict_lock); - } - - /* Free all allocations from active list */ - while (!list_empty(&kctx->jit_active_head)) { - walker = list_first_entry(&kctx->jit_active_head, - struct kbase_va_region, jit_node); - list_del(&walker->jit_node); - list_del_init(&walker->gpu_alloc->evict_node); - mutex_unlock(&kctx->jit_evict_lock); - walker->flags &= ~KBASE_REG_JIT; - kbase_mem_free_region(kctx, walker); - mutex_lock(&kctx->jit_evict_lock); - } - mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); - - /* - * Flush the freeing of allocations whose backing has been freed - * (i.e. everything in jit_destroy_head). - */ - cancel_work_sync(&kctx->jit_work); -} - -bool kbase_has_exec_va_zone(struct kbase_context *kctx) -{ - bool has_exec_va_zone; - - kbase_gpu_vm_lock(kctx); - has_exec_va_zone = (kctx->exec_va_start != U64_MAX); - kbase_gpu_vm_unlock(kctx); - - return has_exec_va_zone; -} - -static int kbase_jd_user_buf_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - long pinned_pages; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - struct tagged_addr *pa; - long i; - int err = -ENOMEM; - unsigned long address; - struct mm_struct *mm; - struct device *dev; - unsigned long offset; - unsigned long local_size; - unsigned long gwt_mask = ~0; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; - mm = alloc->imported.user_buf.mm; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - - pages = alloc->imported.user_buf.pages; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) - pinned_pages = get_user_pages(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) - pinned_pages = get_user_pages_remote(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) - pinned_pages = get_user_pages_remote(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL); -#else - pinned_pages = get_user_pages_remote(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL, NULL); -#endif - - if (pinned_pages <= 0) - return pinned_pages; - - if (pinned_pages != alloc->imported.user_buf.nr_pages) { - for (i = 0; i < pinned_pages; i++) - put_page(pages[i]); - return -ENOMEM; - } - - dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; - - for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto unwind; - - alloc->imported.user_buf.dma_addrs[i] = dma_addr; - pa[i] = as_tagged(page_to_phys(pages[i])); - - local_size -= min; - offset = 0; - } - - alloc->nents = pinned_pages; -#ifdef CONFIG_MALI_CINSTR_GWT - if (kctx->gwt_enabled) - gwt_mask = ~KBASE_REG_GPU_WR; -#endif - - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - pa, kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr); - if (err == 0) - return 0; - - alloc->nents = 0; - /* fall down */ -unwind: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - } - - while (++i < pinned_pages) { - put_page(pages[i]); - pages[i] = NULL; - } - - return err; -} - -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) -{ - long i; - struct page **pages; - unsigned long size = alloc->imported.user_buf.size; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - pages = alloc->imported.user_buf.pages; - for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; - dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, - DMA_BIDIRECTIONAL); - if (writeable) - set_page_dirty_lock(pages[i]); - put_page(pages[i]); - pages[i] = NULL; - - size -= local_size; - } - alloc->nents = 0; -} - -#ifdef CONFIG_DMA_SHARED_BUFFER -static int kbase_jd_umm_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - struct sg_table *sgt; - struct scatterlist *s; - int i; - struct tagged_addr *pa; - int err; - size_t count = 0; - struct kbase_mem_phy_alloc *alloc; - unsigned long gwt_mask = ~0; - - alloc = reg->gpu_alloc; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); - KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, - DMA_BIDIRECTIONAL); - - if (IS_ERR_OR_NULL(sgt)) - return -EINVAL; - - /* save for later */ - alloc->imported.umm.sgt = sgt; - - pa = kbase_get_gpu_phy_pages(reg); - KBASE_DEBUG_ASSERT(pa); - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - size_t j, pages = PFN_UP(sg_dma_len(s)); - - WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), - "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", - sg_dma_len(s)); - - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); - - for (j = 0; (j < pages) && (count < reg->nr_pages); j++, - count++) - *pa++ = as_tagged(sg_dma_address(s) + - (j << PAGE_SHIFT)); - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); - } - - if (!(reg->flags & KBASE_REG_IMPORT_PAD) && - WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size)) { - err = -EINVAL; - goto err_unmap_attachment; - } - - /* Update nents as we now have pages to map */ - alloc->nents = reg->nr_pages; - -#ifdef CONFIG_MALI_CINSTR_GWT - if (kctx->gwt_enabled) - gwt_mask = ~KBASE_REG_GPU_WR; -#endif - - err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - count, - (reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) & - gwt_mask, - kctx->as_nr); - if (err) - goto err_unmap_attachment; - - if (reg->flags & KBASE_REG_IMPORT_PAD) { - err = kbase_mmu_insert_single_page(kctx, - reg->start_pfn + count, - kctx->aliasing_sink_page, - reg->nr_pages - count, - (reg->flags | KBASE_REG_GPU_RD) & - ~KBASE_REG_GPU_WR); - if (err) - goto err_teardown_orig_pages; - } - - return 0; - -err_teardown_orig_pages: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - count, kctx->as_nr); -err_unmap_attachment: - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - - return err; -} - -static void kbase_jd_umm_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(alloc); - KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); - KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - alloc->nents = 0; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm) -{ - int err; - - /* decide what needs to happen for this resource */ - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) - goto exit; - - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { - err = kbase_jd_user_buf_map(kctx, reg); - if (err) { - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; - } - } - } - break; -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - reg->gpu_alloc->imported.umm.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - err = kbase_jd_umm_map(kctx, reg); - if (err) { - reg->gpu_alloc->imported.umm.current_mapping_usage_count--; - goto exit; - } - } - break; - } -#endif - default: - goto exit; - } - - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; -} - -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) -{ - switch (alloc->type) { -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - if (reg && reg->gpu_alloc == alloc) { - int err; - - err = kbase_mmu_teardown_pages( - kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - alloc->nents, - kctx->as_nr); - WARN_ON(err); - } - - kbase_jd_umm_unmap(kctx, alloc); - } - } - break; -#endif /* CONFIG_DMA_SHARED_BUFFER */ - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - alloc->imported.user_buf.current_mapping_usage_count--; - - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { - bool writeable = true; - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - kctx->kbdev, - &kctx->mmu, - reg->start_pfn, - kbase_reg_current_backed_size(reg), - kctx->as_nr); - - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) - writeable = false; - - kbase_jd_user_buf_unmap(kctx, alloc, writeable); - } - } - break; - default: - break; - } - kbase_mem_phy_alloc_put(alloc); -} - -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr) -{ - struct kbase_ctx_ext_res_meta *meta = NULL; - struct kbase_ctx_ext_res_meta *walker; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * Walk the per context external resource metadata list for the - * metadata which matches the region which is being acquired. - */ - list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { - meta = walker; - break; - } - } - - /* No metadata exists so create one. */ - if (!meta) { - struct kbase_va_region *reg; - - /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) - goto failed; - - /* Allocate the metadata object */ - meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - goto failed; - - /* - * Fill in the metadata object and acquire a reference - * for the physical resource. - */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL); - - if (!meta->alloc) - goto fail_map; - - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; - - list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); - } - - return meta; - -fail_map: - kfree(meta); -failed: - return NULL; -} - -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) -{ - struct kbase_ctx_ext_res_meta *walker; - struct kbase_va_region *reg; - - lockdep_assert_held(&kctx->reg_lock); - - /* Search of the metadata if one isn't provided. */ - if (!meta) { - /* - * Walk the per context external resource metadata list for the - * metadata which matches the region which is being released. - */ - list_for_each_entry(walker, &kctx->ext_res_meta_head, - ext_res_node) { - if (walker->gpu_addr == gpu_addr) { - meta = walker; - break; - } - } - } - - /* No metadata so just return. */ - if (!meta) - return false; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); - list_del(&meta->ext_res_node); - kfree(meta); - - return true; -} - -int kbase_sticky_resource_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->ext_res_meta_head); - - return 0; -} - -void kbase_sticky_resource_term(struct kbase_context *kctx) -{ - struct kbase_ctx_ext_res_meta *walker; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * Free any sticky resources which haven't been unmapped. - * - * Note: - * We don't care about refcounts at this point as no future - * references to the meta data will be made. - * Region termination would find these if we didn't free them - * here, but it's more efficient if we do the clean up here. - */ - while (!list_empty(&kctx->ext_res_meta_head)) { - walker = list_first_entry(&kctx->ext_res_meta_head, - struct kbase_ctx_ext_res_meta, ext_res_node); - - kbase_sticky_resource_release(kctx, walker, 0); - } -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.h deleted file mode 100755 index a873bb1d08f5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem.h +++ /dev/null @@ -1,1488 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mem.h - * Base kernel memory APIs - */ - -#ifndef _KBASE_MEM_H_ -#define _KBASE_MEM_H_ - -#ifndef _KBASE_H_ -#error "Don't include this file directly, use mali_kbase.h instead" -#endif - -#include -#include "mali_base_kernel.h" -#include -#include "mali_kbase_pm.h" -#include "mali_kbase_defs.h" -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include "mali_kbase_gator.h" -#endif -/* Required for kbase_mem_evictable_unmake */ -#include "mali_kbase_mem_linux.h" - -static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, - int pages); - -/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ - -/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. -The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and -page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table -updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ - -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ - -/* This must always be a power of 2 */ -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) -#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) -/** - * A CPU mapping - */ -struct kbase_cpu_mapping { - struct list_head mappings_list; - struct kbase_mem_phy_alloc *alloc; - struct kbase_context *kctx; - struct kbase_va_region *region; - int count; - int free_on_close; -}; - -enum kbase_memory_type { - KBASE_MEM_TYPE_NATIVE, - KBASE_MEM_TYPE_IMPORTED_UMM, - KBASE_MEM_TYPE_IMPORTED_USER_BUF, - KBASE_MEM_TYPE_ALIAS, - KBASE_MEM_TYPE_RAW -}; - -/* internal structure, mirroring base_mem_aliasing_info, - * but with alloc instead of a gpu va (handle) */ -struct kbase_aliased { - struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ - u64 offset; /* in pages */ - u64 length; /* in pages */ -}; - -/** - * @brief Physical pages tracking object properties - */ -#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0) -#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1) - -/* physical pages tracking object. - * Set up to track N pages. - * N not stored here, the creator holds that info. - * This object only tracks how many elements are actually valid (present). - * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not - * shared with another region or client. CPU mappings are OK to exist when changing, as - * long as the tracked mappings objects are updated as part of the change. - */ -struct kbase_mem_phy_alloc { - struct kref kref; /* number of users of this alloc */ - atomic_t gpu_mappings; - size_t nents; /* 0..N */ - struct tagged_addr *pages; /* N elements, only 0..nents are valid */ - - /* kbase_cpu_mappings */ - struct list_head mappings; - - /* Node used to store this allocation on the eviction list */ - struct list_head evict_node; - /* Physical backing size when the pages where evicted */ - size_t evicted; - /* - * Back reference to the region structure which created this - * allocation, or NULL if it has been freed. - */ - struct kbase_va_region *reg; - - /* type of buffer */ - enum kbase_memory_type type; - - /* Kernel side mapping of the alloc, shall never be referred directly. - * kbase_phy_alloc_mapping_get() & kbase_phy_alloc_mapping_put() pair - * should be used around access to the kernel-side CPU mapping so that - * mapping doesn't disappear whilst it is being accessed. - */ - struct kbase_vmap_struct *permanent_map; - - unsigned long properties; - - /* member in union valid based on @a type */ - union { -#if defined(CONFIG_DMA_SHARED_BUFFER) - struct { - struct dma_buf *dma_buf; - struct dma_buf_attachment *dma_attachment; - unsigned int current_mapping_usage_count; - struct sg_table *sgt; - } umm; -#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ - struct { - u64 stride; - size_t nents; - struct kbase_aliased *aliased; - } alias; - struct { - struct kbase_context *kctx; - /* Number of pages in this structure, including *pages. - * Used for kernel memory tracking. - */ - size_t nr_struct_pages; - } native; - struct kbase_alloc_import_user_buf { - unsigned long address; - unsigned long size; - unsigned long nr_pages; - struct page **pages; - /* top bit (1<<31) of current_mapping_usage_count - * specifies that this import was pinned on import - * See PINNED_ON_IMPORT - */ - u32 current_mapping_usage_count; - struct mm_struct *mm; - dma_addr_t *dma_addrs; - } user_buf; - } imported; -}; - -/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is - * used to signify that a buffer was pinned when it was imported. Since the - * reference count is limited by the number of atoms that can be submitted at - * once there should be no danger of overflowing into this bit. - * Stealing the top bit also has the benefit that - * current_mapping_usage_count != 0 if and only if the buffer is mapped. - */ -#define PINNED_ON_IMPORT (1<<31) - -static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(alloc); - /* we only track mappings of NATIVE buffers */ - if (alloc->type == KBASE_MEM_TYPE_NATIVE) - atomic_inc(&alloc->gpu_mappings); -} - -static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(alloc); - /* we only track mappings of NATIVE buffers */ - if (alloc->type == KBASE_MEM_TYPE_NATIVE) - if (0 > atomic_dec_return(&alloc->gpu_mappings)) { - pr_err("Mismatched %s:\n", __func__); - dump_stack(); - } -} - -/** - * kbase_mem_is_imported - Indicate whether a memory type is imported - * - * @type: the memory type - * - * Return: true if the memory type is imported, false otherwise - */ -static inline bool kbase_mem_is_imported(enum kbase_memory_type type) -{ - return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || - (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); -} - -void kbase_mem_kref_free(struct kref *kref); - -int kbase_mem_init(struct kbase_device *kbdev); -void kbase_mem_halt(struct kbase_device *kbdev); -void kbase_mem_term(struct kbase_device *kbdev); - -static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) -{ - kref_get(&alloc->kref); - return alloc; -} - -static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) -{ - kref_put(&alloc->kref, kbase_mem_kref_free); - return NULL; -} - -/** - * A GPU memory region, and attributes for CPU mappings. - */ -struct kbase_va_region { - struct rb_node rblink; - struct list_head link; - - struct rb_root *rbtree; /* Backlink to rb tree */ - - u64 start_pfn; /* The PFN in GPU space */ - size_t nr_pages; - /* Initial commit, for aligning the start address and correctly growing - * KBASE_REG_TILER_ALIGN_TOP regions */ - size_t initial_commit; - -/* Free region */ -#define KBASE_REG_FREE (1ul << 0) -/* CPU write access */ -#define KBASE_REG_CPU_WR (1ul << 1) -/* GPU write access */ -#define KBASE_REG_GPU_WR (1ul << 2) -/* No eXecute flag */ -#define KBASE_REG_GPU_NX (1ul << 3) -/* Is CPU cached? */ -#define KBASE_REG_CPU_CACHED (1ul << 4) -/* Is GPU cached? - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - */ -#define KBASE_REG_GPU_CACHED (1ul << 5) - -#define KBASE_REG_GROWABLE (1ul << 6) -/* Can grow on pf? */ -#define KBASE_REG_PF_GROW (1ul << 7) - -/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ -#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) - -/* inner shareable coherency */ -#define KBASE_REG_SHARE_IN (1ul << 9) -/* inner & outer shareable coherency */ -#define KBASE_REG_SHARE_BOTH (1ul << 10) - -/* Space for 4 different zones */ -#define KBASE_REG_ZONE_MASK (3ul << 11) -#define KBASE_REG_ZONE(x) (((x) & 3) << 11) - -/* GPU read access */ -#define KBASE_REG_GPU_RD (1ul<<13) -/* CPU read access */ -#define KBASE_REG_CPU_RD (1ul<<14) - -/* Index of chosen MEMATTR for this region (0..7) */ -#define KBASE_REG_MEMATTR_MASK (7ul << 16) -#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) -#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) - -#define KBASE_REG_SECURE (1ul << 19) - -#define KBASE_REG_DONT_NEED (1ul << 20) - -/* Imported buffer is padded? */ -#define KBASE_REG_IMPORT_PAD (1ul << 21) - -/* Bit 22 is reserved. - * - * Do not remove, use the next unreserved bit for new flags */ -#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) - -/* The top of the initial commit is aligned to extent pages. - * Extent must be a power of 2 */ -#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) - -/* Memory is handled by JIT - user space should not be able to free it */ -#define KBASE_REG_JIT (1ul << 24) - -/* Memory has permanent kernel side mapping */ -#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) - -#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) - -/* only used with 32-bit clients */ -/* - * On a 32bit platform, custom VA should be wired from 4GB - * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface - * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). - * So we put the default limit to the maximum possible on Linux and shrink - * it down, if required by the GPU, during initialization. - */ - -#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) -#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) -#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -/* end 32-bit clients only */ - -/* The starting address and size of the GPU-executable zone are dynamic - * and depend on the platform and the number of pages requested by the - * user process, with an upper limit of 4 GB. - */ -#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) -#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ - - - unsigned long flags; - - size_t extent; /* nr of pages alloc'd on PF */ - - struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ - struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ - - /* List head used to store the region in the JIT allocation pool */ - struct list_head jit_node; - /* The last JIT usage ID for this region */ - u16 jit_usage_id; - /* The JIT bin this allocation came from */ - u8 jit_bin_id; -}; - -/* Common functions */ -static inline struct tagged_addr *kbase_get_cpu_phy_pages( - struct kbase_va_region *reg) -{ - KBASE_DEBUG_ASSERT(reg); - KBASE_DEBUG_ASSERT(reg->cpu_alloc); - KBASE_DEBUG_ASSERT(reg->gpu_alloc); - KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); - - return reg->cpu_alloc->pages; -} - -static inline struct tagged_addr *kbase_get_gpu_phy_pages( - struct kbase_va_region *reg) -{ - KBASE_DEBUG_ASSERT(reg); - KBASE_DEBUG_ASSERT(reg->cpu_alloc); - KBASE_DEBUG_ASSERT(reg->gpu_alloc); - KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); - - return reg->gpu_alloc->pages; -} - -static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) -{ - KBASE_DEBUG_ASSERT(reg); - /* if no alloc object the backed size naturally is 0 */ - if (!reg->cpu_alloc) - return 0; - - KBASE_DEBUG_ASSERT(reg->cpu_alloc); - KBASE_DEBUG_ASSERT(reg->gpu_alloc); - KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); - - return reg->cpu_alloc->nents; -} - -#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ - -static inline struct kbase_mem_phy_alloc *kbase_alloc_create( - struct kbase_context *kctx, size_t nr_pages, - enum kbase_memory_type type) -{ - struct kbase_mem_phy_alloc *alloc; - size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; - size_t per_page_size = sizeof(*alloc->pages); - - /* Imported pages may have page private data already in use */ - if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - alloc_size += nr_pages * - sizeof(*alloc->imported.user_buf.dma_addrs); - per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); - } - - /* - * Prevent nr_pages*per_page_size + sizeof(*alloc) from - * wrapping around. - */ - if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) - / per_page_size)) - return ERR_PTR(-ENOMEM); - - /* Allocate based on the size to reduce internal fragmentation of vmem */ - if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) - alloc = vzalloc(alloc_size); - else - alloc = kzalloc(alloc_size, GFP_KERNEL); - - if (!alloc) - return ERR_PTR(-ENOMEM); - - if (type == KBASE_MEM_TYPE_NATIVE) { - alloc->imported.native.nr_struct_pages = - (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - kbase_process_page_usage_inc(kctx, - alloc->imported.native.nr_struct_pages); - } - - /* Store allocation method */ - if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) - alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; - - kref_init(&alloc->kref); - atomic_set(&alloc->gpu_mappings, 0); - alloc->nents = 0; - alloc->pages = (void *)(alloc + 1); - INIT_LIST_HEAD(&alloc->mappings); - alloc->type = type; - - if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) - alloc->imported.user_buf.dma_addrs = - (void *) (alloc->pages + nr_pages); - - return alloc; -} - -static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, - struct kbase_context *kctx) -{ - KBASE_DEBUG_ASSERT(reg); - KBASE_DEBUG_ASSERT(!reg->cpu_alloc); - KBASE_DEBUG_ASSERT(!reg->gpu_alloc); - KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); - - reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE); - if (IS_ERR(reg->cpu_alloc)) - return PTR_ERR(reg->cpu_alloc); - else if (!reg->cpu_alloc) - return -ENOMEM; - - reg->cpu_alloc->imported.native.kctx = kctx; - if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) - && (reg->flags & KBASE_REG_CPU_CACHED)) { - reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, - KBASE_MEM_TYPE_NATIVE); - if (IS_ERR_OR_NULL(reg->gpu_alloc)) { - kbase_mem_phy_alloc_put(reg->cpu_alloc); - return -ENOMEM; - } - reg->gpu_alloc->imported.native.kctx = kctx; - } else { - reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - } - - mutex_lock(&kctx->jit_evict_lock); - INIT_LIST_HEAD(®->cpu_alloc->evict_node); - INIT_LIST_HEAD(®->gpu_alloc->evict_node); - mutex_unlock(&kctx->jit_evict_lock); - - reg->flags &= ~KBASE_REG_FREE; - - return 0; -} - -static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages) -{ - int new_val = atomic_add_return(num_pages, used_pages); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -#endif - return new_val; -} - -static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages) -{ - int new_val = atomic_sub_return(num_pages, used_pages); -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_total_alloc_pages_change((long long int)new_val); -#endif - return new_val; -} - -/* - * Max size for kbdev memory pool (in pages) - */ -#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) - -/* - * Max size for kctx memory pool (in pages) - */ -#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) - -/* - * The order required for a 2MB page allocation (2^order * 4KB = 2MB) - */ -#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 - -/* - * The order required for a 4KB page allocation - */ -#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 - -/** - * kbase_mem_pool_init - Create a memory pool for a kbase device - * @pool: Memory pool to initialize - * @max_size: Maximum number of free pages the pool can hold - * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) - * @kbdev: Kbase device where memory is used - * @next_pool: Pointer to the next pool or NULL. - * - * Allocations from @pool are in whole pages. Each @pool has a free list where - * pages can be quickly allocated from. The free list is initially empty and - * filled whenever pages are freed back to the pool. The number of free pages - * in the pool will in general not exceed @max_size, but the pool may in - * certain corner cases grow above @max_size. - * - * If @next_pool is not NULL, we will allocate from @next_pool before going to - * the kernel allocator. Similarily pages can spill over to @next_pool when - * @pool is full. Pages are zeroed before they spill over to another pool, to - * prevent leaking information between applications. - * - * A shrinker is registered so that Linux mm can reclaim pages from the pool as - * needed. - * - * Return: 0 on success, negative -errno on error - */ -int kbase_mem_pool_init(struct kbase_mem_pool *pool, - size_t max_size, - size_t order, - struct kbase_device *kbdev, - struct kbase_mem_pool *next_pool); - -/** - * kbase_mem_pool_term - Destroy a memory pool - * @pool: Memory pool to destroy - * - * Pages in the pool will spill over to @next_pool (if available) or freed to - * the kernel. - */ -void kbase_mem_pool_term(struct kbase_mem_pool *pool); - -/** - * kbase_mem_pool_alloc - Allocate a page from memory pool - * @pool: Memory pool to allocate from - * - * Allocations from the pool are made as follows: - * 1. If there are free pages in the pool, allocate a page from @pool. - * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page - * from @next_pool. - * 3. Return NULL if no memory in the pool - * - * Return: Pointer to allocated page, or NULL if allocation failed. - * - * Note : This function should not be used if the pool lock is held. Use - * kbase_mem_pool_alloc_locked() instead. - */ -struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); - -/** - * kbase_mem_pool_alloc_locked - Allocate a page from memory pool - * @pool: Memory pool to allocate from - * - * If there are free pages in the pool, this function allocates a page from - * @pool. This function does not use @next_pool. - * - * Return: Pointer to allocated page, or NULL if allocation failed. - * - * Note : Caller must hold the pool lock. - */ -struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); - -/** - * kbase_mem_pool_free - Free a page to memory pool - * @pool: Memory pool where page should be freed - * @page: Page to free to the pool - * @dirty: Whether some of the page may be dirty in the cache. - * - * Pages are freed to the pool as follows: - * 1. If @pool is not full, add @page to @pool. - * 2. Otherwise, if @next_pool is not NULL and not full, add @page to - * @next_pool. - * 3. Finally, free @page to the kernel. - * - * Note : This function should not be used if the pool lock is held. Use - * kbase_mem_pool_free_locked() instead. - */ -void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, - bool dirty); - -/** - * kbase_mem_pool_free_locked - Free a page to memory pool - * @pool: Memory pool where page should be freed - * @p: Page to free to the pool - * @dirty: Whether some of the page may be dirty in the cache. - * - * If @pool is not full, this function adds @page to @pool. Otherwise, @page is - * freed to the kernel. This function does not use @next_pool. - * - * Note : Caller must hold the pool lock. - */ -void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, - bool dirty); - -/** - * kbase_mem_pool_alloc_pages - Allocate pages from memory pool - * @pool: Memory pool to allocate from - * @nr_pages: Number of pages to allocate - * @pages: Pointer to array where the physical address of the allocated - * pages will be stored. - * @partial_allowed: If fewer pages allocated is allowed - * - * Like kbase_mem_pool_alloc() but optimized for allocating many pages. - * - * Return: - * On success number of pages allocated (could be less than nr_pages if - * partial_allowed). - * On error an error code. - * - * Note : This function should not be used if the pool lock is held. Use - * kbase_mem_pool_alloc_pages_locked() instead. - * - * The caller must not hold vm_lock, as this could cause a deadlock if - * the kernel OoM killer runs. If the caller must allocate pages while holding - * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. - */ -int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, - struct tagged_addr *pages, bool partial_allowed); - -/** - * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool - * @pool: Memory pool to allocate from - * @nr_4k_pages: Number of pages to allocate - * @pages: Pointer to array where the physical address of the allocated - * pages will be stored. - * - * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This - * version does not allocate new pages from the kernel, and therefore will never - * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. - * - * As new pages can not be allocated, the caller must ensure there are - * sufficient pages in the pool. Usage of this function should look like : - * - * kbase_gpu_vm_lock(kctx); - * kbase_mem_pool_lock(pool) - * while (kbase_mem_pool_size(pool) < pages_required) { - * kbase_mem_pool_unlock(pool) - * kbase_gpu_vm_unlock(kctx); - * kbase_mem_pool_grow(pool) - * kbase_gpu_vm_lock(kctx); - * kbase_mem_pool_lock(pool) - * } - * kbase_mem_pool_alloc_pages_locked(pool) - * kbase_mem_pool_unlock(pool) - * Perform other processing that requires vm_lock... - * kbase_gpu_vm_unlock(kctx); - * - * This ensures that the pool can be grown to the required size and that the - * allocation can complete without another thread using the newly grown pages. - * - * Return: - * On success number of pages allocated. - * On error an error code. - * - * Note : Caller must hold the pool lock. - */ -int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, - size_t nr_4k_pages, struct tagged_addr *pages); - -/** - * kbase_mem_pool_free_pages - Free pages to memory pool - * @pool: Memory pool where pages should be freed - * @nr_pages: Number of pages to free - * @pages: Pointer to array holding the physical addresses of the pages to - * free. - * @dirty: Whether any pages may be dirty in the cache. - * @reclaimed: Whether the pages where reclaimable and thus should bypass - * the pool and go straight to the kernel. - * - * Like kbase_mem_pool_free() but optimized for freeing many pages. - */ -void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - struct tagged_addr *pages, bool dirty, bool reclaimed); - -/** - * kbase_mem_pool_free_pages_locked - Free pages to memory pool - * @pool: Memory pool where pages should be freed - * @nr_pages: Number of pages to free - * @pages: Pointer to array holding the physical addresses of the pages to - * free. - * @dirty: Whether any pages may be dirty in the cache. - * @reclaimed: Whether the pages where reclaimable and thus should bypass - * the pool and go straight to the kernel. - * - * Like kbase_mem_pool_free() but optimized for freeing many pages. - */ -void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, bool dirty, - bool reclaimed); - -/** - * kbase_mem_pool_size - Get number of free pages in memory pool - * @pool: Memory pool to inspect - * - * Note: the size of the pool may in certain corner cases exceed @max_size! - * - * Return: Number of free pages in the pool - */ -static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) -{ - return READ_ONCE(pool->cur_size); -} - -/** - * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool - * @pool: Memory pool to inspect - * - * Return: Maximum number of free pages in the pool - */ -static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) -{ - return pool->max_size; -} - - -/** - * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool - * @pool: Memory pool to inspect - * @max_size: Maximum number of free pages the pool can hold - * - * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. - * For details see kbase_mem_pool_shrink(). - */ -void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); - -/** - * kbase_mem_pool_grow - Grow the pool - * @pool: Memory pool to grow - * @nr_to_grow: Number of pages to add to the pool - * - * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to - * become larger than the maximum size specified. - * - * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages - */ -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); - -/** - * kbase_mem_pool_trim - Grow or shrink the pool to a new size - * @pool: Memory pool to trim - * @new_size: New number of pages in the pool - * - * If @new_size > @cur_size, fill the pool with new pages from the kernel, but - * not above the max_size for the pool. - * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. - */ -void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); - -/** - * kbase_mem_pool_mark_dying - Mark that this pool is dying - * @pool: Memory pool - * - * This will cause any ongoing allocation operations (eg growing on page fault) - * to be terminated. - */ -void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); - -/** - * kbase_mem_alloc_page - Allocate a new page for a device - * @pool: Memory pool to allocate a page from - * - * Most uses should use kbase_mem_pool_alloc to allocate a page. However that - * function can fail in the event the pool is empty. - * - * Return: A new page or NULL if no memory - */ -struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); - -/** - * kbase_region_tracker_init - Initialize the region tracker data structure - * @kctx: kbase context - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init(struct kbase_context *kctx); - -/** - * kbase_region_tracker_init_jit - Initialize the JIT region - * @kctx: kbase context - * @jit_va_pages: Size of the JIT region in pages - * @max_allocations: Maximum number of allocations allowed for the JIT region - * @trim_level: Trim level for the JIT region - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, - u8 max_allocations, u8 trim_level); - -/** - * kbase_region_tracker_init_exec - Initialize the EXEC_VA region - * @kctx: kbase context - * @exec_va_pages: Size of the JIT region in pages. - * It must not be greater than 4 GB. - * - * Return: 0 if success, negative error code otherwise. - */ -int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); - -/** - * kbase_region_tracker_term - Terminate the JIT region - * @kctx: kbase context - */ -void kbase_region_tracker_term(struct kbase_context *kctx); - -/** - * kbase_region_tracker_term_rbtree - Free memory for a region tracker - * - * This will free all the regions within the region tracker - * - * @rbtree: Region tracker tree root - */ -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); - -struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( - struct kbase_context *kctx, u64 gpu_addr); -struct kbase_va_region *kbase_find_region_enclosing_address( - struct rb_root *rbtree, u64 gpu_addr); - -/** - * @brief Check that a pointer is actually a valid region. - * - * Must be called with context lock held. - */ -struct kbase_va_region *kbase_region_tracker_find_region_base_address( - struct kbase_context *kctx, u64 gpu_addr); -struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, - u64 gpu_addr); - -struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone); -void kbase_free_alloced_region(struct kbase_va_region *reg); -int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, - u64 addr, size_t nr_pages, size_t align); -int kbase_add_va_region_rbtree(struct kbase_device *kbdev, - struct kbase_va_region *reg, u64 addr, size_t nr_pages, - size_t align); -int kbase_remove_va_region(struct kbase_va_region *reg); - -bool kbase_check_alloc_flags(unsigned long flags); -bool kbase_check_import_flags(unsigned long flags); - -/** - * kbase_check_alloc_sizes - check user space sizes parameters for an - * allocation - * - * @kctx: kbase context - * @flags: The flags passed from user space - * @va_pages: The size of the requested region, in pages. - * @commit_pages: Number of pages to commit initially. - * @extent: Number of pages to grow by on GPU page fault and/or alignment - * (depending on flags) - * - * Makes checks on the size parameters passed in from user space for a memory - * allocation call, with respect to the flags requested. - * - * Return: 0 if sizes are valid for these flags, negative error code otherwise - */ -int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 extent); - -/** - * kbase_update_region_flags - Convert user space flags to kernel region flags - * - * @kctx: kbase context - * @reg: The region to update the flags on - * @flags: The flags passed from user space - * - * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and - * this function will fail if the system does not support system coherency. - * - * Return: 0 if successful, -EINVAL if the flags are not supported - */ -int kbase_update_region_flags(struct kbase_context *kctx, - struct kbase_va_region *reg, unsigned long flags); - -void kbase_gpu_vm_lock(struct kbase_context *kctx); -void kbase_gpu_vm_unlock(struct kbase_context *kctx); - -int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); - -/** - * kbase_mmu_init - Initialise an object representing GPU page tables - * - * The structure should be terminated using kbase_mmu_term() - * - * @kbdev: kbase device - * @mmut: structure to initialise - * @kctx: optional kbase context, may be NULL if this set of MMU tables is not - * associated with a context - */ -int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct kbase_context *kctx); -/** - * kbase_mmu_term - Terminate an object representing GPU page tables - * - * This will free any page tables that have been allocated - * - * @kbdev: kbase device - * @mmut: kbase_mmu_table to be destroyed - */ -void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); - -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - const u64 start_vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags); - -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - size_t nr, int as_nr); -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); - -/** - * @brief Register region and map it on the GPU. - * - * Call kbase_add_va_region() and map the region on the GPU. - */ -int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); - -/** - * @brief Remove the region from the GPU and unregister it. - * - * Must be called with context lock held. - */ -int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); - -/** - * kbase_mmu_update - Configure an address space on the GPU to the specified - * MMU tables - * - * The caller has the following locking conditions: - * - It must hold kbase_device->mmu_hw_mutex - * - It must hold the hwaccess_lock - * - * @kbdev: Kbase device structure - * @mmut: The set of MMU tables to be configured on the address space - * @as_nr: The address space to be configured - */ -void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - int as_nr); - -/** - * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. - * @kctx: Kbase context - * - * Disable and perform the required cache maintenance to remove the all - * data from provided kbase context from the GPU caches. - * - * The caller has the following locking conditions: - * - It must hold kbase_device->mmu_hw_mutex - * - It must hold the hwaccess_lock - */ -void kbase_mmu_disable(struct kbase_context *kctx); - -/** - * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified - * address space. - * @kbdev: Kbase device - * @as_nr: The address space number to set to unmapped. - * - * This function must only be called during reset/power-up and it used to - * ensure the registers are in a known state. - * - * The caller must hold kbdev->mmu_hw_mutex. - */ -void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); - -void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); - -/** Dump the MMU tables to a buffer - * - * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the - * buffer is too small then the return value will be NULL. - * - * The GPU vm lock must be held when calling this function. - * - * The buffer returned should be freed with @ref vfree when it is no longer required. - * - * @param[in] kctx The kbase context to dump - * @param[in] nr_pages The number of pages to allocate for the buffer. - * - * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too - * small) - */ -void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); - -/** - * kbase_sync_now - Perform cache maintenance on a memory region - * - * @kctx: The kbase context of the region - * @sset: A syncset structure describing the region and direction of the - * synchronisation required - * - * Return: 0 on success or error code - */ -int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); -void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, - struct tagged_addr gpu_pa, off_t offset, size_t size, - enum kbase_sync_type sync_fn); -void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); -void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); - -/* OS specific functions */ -int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); -int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); -void kbase_os_mem_map_lock(struct kbase_context *kctx); -void kbase_os_mem_map_unlock(struct kbase_context *kctx); - -/** - * @brief Update the memory allocation counters for the current process - * - * OS specific call to updates the current memory allocation counters for the current process with - * the supplied delta. - * - * @param[in] kctx The kbase context - * @param[in] pages The desired delta to apply to the memory usage counters. - */ - -void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); - -/** - * @brief Add to the memory allocation counters for the current process - * - * OS specific call to add to the current memory allocation counters for the current process by - * the supplied amount. - * - * @param[in] kctx The kernel base context used for the allocation. - * @param[in] pages The desired delta to apply to the memory usage counters. - */ - -static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) -{ - kbasep_os_process_page_usage_update(kctx, pages); -} - -/** - * @brief Subtract from the memory allocation counters for the current process - * - * OS specific call to subtract from the current memory allocation counters for the current process by - * the supplied amount. - * - * @param[in] kctx The kernel base context used for the allocation. - * @param[in] pages The desired delta to apply to the memory usage counters. - */ - -static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) -{ - kbasep_os_process_page_usage_update(kctx, 0 - pages); -} - -/** - * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU - * mapping of a memory allocation containing a given address range - * - * Searches for a CPU mapping of any part of any region that fully encloses the - * CPU virtual address range specified by @uaddr and @size. Returns a failure - * indication if only part of the address range lies within a CPU mapping. - * - * @kctx: The kernel base context used for the allocation. - * @uaddr: Start of the CPU virtual address range. - * @size: Size of the CPU virtual address range (in bytes). - * @offset: The offset from the start of the allocation to the specified CPU - * virtual address. - * - * Return: 0 if offset was obtained successfully. Error code otherwise. - */ -int kbasep_find_enclosing_cpu_mapping_offset( - struct kbase_context *kctx, - unsigned long uaddr, size_t size, u64 *offset); - -/** - * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of - * the start of GPU virtual memory region which encloses @gpu_addr for the - * @size length in bytes - * - * Searches for the memory region in GPU virtual memory space which contains - * the region defined by the @gpu_addr and @size, where @gpu_addr is the - * beginning and @size the length in bytes of the provided region. If found, - * the location of the start address of the GPU virtual memory region is - * passed in @start pointer and the location of the offset of the region into - * the GPU virtual memory region is passed in @offset pointer. - * - * @kctx: The kernel base context within which the memory is searched. - * @gpu_addr: GPU virtual address for which the region is sought; defines - * the beginning of the provided region. - * @size: The length (in bytes) of the provided region for which the - * GPU virtual memory region is sought. - * @start: Pointer to the location where the address of the start of - * the found GPU virtual memory region is. - * @offset: Pointer to the location where the offset of @gpu_addr into - * the found GPU virtual memory region is. - */ -int kbasep_find_enclosing_gpu_mapping_start_and_offset( - struct kbase_context *kctx, - u64 gpu_addr, size_t size, u64 *start, u64 *offset); - -enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer); -void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); -void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); - -/** - * kbase_alloc_phy_pages_helper - Allocates physical pages. - * @alloc: allocation object to add pages to - * @nr_pages_requested: number of physical pages to allocate - * - * Allocates \a nr_pages_requested and updates the alloc object. - * - * Return: 0 if all pages have been successfully allocated. Error code otherwise - * - * Note : The caller must not hold vm_lock, as this could cause a deadlock if - * the kernel OoM killer runs. If the caller must allocate pages while holding - * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. - * - * This function cannot be used from interrupt context - */ -int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, - size_t nr_pages_requested); - -/** - * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. - * @alloc: allocation object to add pages to - * @pool: Memory pool to allocate from - * @nr_pages_requested: number of physical pages to allocate - * @prealloc_sa: Information about the partial allocation if the amount - * of memory requested is not a multiple of 2MB. One - * instance of struct kbase_sub_alloc must be allocated by - * the caller iff CONFIG_MALI_2MB_ALLOC is enabled. - * - * Allocates \a nr_pages_requested and updates the alloc object. This function - * does not allocate new pages from the kernel, and therefore will never trigger - * the OoM killer. Therefore, it can be run while the vm_lock is held. - * - * As new pages can not be allocated, the caller must ensure there are - * sufficient pages in the pool. Usage of this function should look like : - * - * kbase_gpu_vm_lock(kctx); - * kbase_mem_pool_lock(pool) - * while (kbase_mem_pool_size(pool) < pages_required) { - * kbase_mem_pool_unlock(pool) - * kbase_gpu_vm_unlock(kctx); - * kbase_mem_pool_grow(pool) - * kbase_gpu_vm_lock(kctx); - * kbase_mem_pool_lock(pool) - * } - * kbase_alloc_phy_pages_helper_locked(pool) - * kbase_mem_pool_unlock(pool) - * Perform other processing that requires vm_lock... - * kbase_gpu_vm_unlock(kctx); - * - * This ensures that the pool can be grown to the required size and that the - * allocation can complete without another thread using the newly grown pages. - * - * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then - * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be - * alloc->imported.native.kctx->mem_pool. - * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be - * pre-allocated because we must not sleep (due to the usage of kmalloc()) - * whilst holding pool->pool_lock. - * @prealloc_sa shall be set to NULL if it has been consumed by this function - * to indicate that the caller must not free it. - * - * Return: Pointer to array of allocated pages. NULL on failure. - * - * Note : Caller must hold pool->pool_lock - */ -struct tagged_addr *kbase_alloc_phy_pages_helper_locked( - struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, - size_t nr_pages_requested, - struct kbase_sub_alloc **prealloc_sa); - -/** -* @brief Free physical pages. -* -* Frees \a nr_pages and updates the alloc object. -* -* @param[in] alloc allocation object to free pages from -* @param[in] nr_pages_to_free number of physical pages to free -*/ -int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); - -/** - * kbase_free_phy_pages_helper_locked - Free pages allocated with - * kbase_alloc_phy_pages_helper_locked() - * @alloc: Allocation object to free pages from - * @pool: Memory pool to return freed pages to - * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() - * @nr_pages_to_free: Number of physical pages to free - * - * This function atomically frees pages allocated with - * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page - * array that is returned by that function. @pool must be the pool that the - * pages were originally allocated from. - * - * If the mem_pool has been unlocked since the allocation then - * kbase_free_phy_pages_helper() should be used instead. - */ -void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, - struct kbase_mem_pool *pool, struct tagged_addr *pages, - size_t nr_pages_to_free); - -static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) -{ - SetPagePrivate(p); - if (sizeof(dma_addr_t) > sizeof(p->private)) { - /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the - * private field stays the same. So we have to be clever and - * use the fact that we only store DMA addresses of whole pages, - * so the low bits should be zero */ - KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); - set_page_private(p, dma_addr >> PAGE_SHIFT); - } else { - set_page_private(p, dma_addr); - } -} - -static inline dma_addr_t kbase_dma_addr(struct page *p) -{ - if (sizeof(dma_addr_t) > sizeof(p->private)) - return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; - - return (dma_addr_t)page_private(p); -} - -static inline void kbase_clear_dma_addr(struct page *p) -{ - ClearPagePrivate(p); -} - -/** - * kbase_mmu_interrupt_process - Process a bus or page fault. - * @kbdev The kbase_device the fault happened on - * @kctx The kbase_context for the faulting address space if one was found. - * @as The address space that has the fault - * @fault Data relating to the fault - * - * This function will process a fault on a specific address space - */ -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault); - -/** - * @brief Process a page fault. - * - * @param[in] data work_struct passed by queue_work() - */ -void page_fault_worker(struct work_struct *data); - -/** - * @brief Process a bus fault. - * - * @param[in] data work_struct passed by queue_work() - */ -void bus_fault_worker(struct work_struct *data); - -/** - * @brief Flush MMU workqueues. - * - * This function will cause any outstanding page or bus faults to be processed. - * It should be called prior to powering off the GPU. - * - * @param[in] kbdev Device pointer - */ -void kbase_flush_mmu_wqs(struct kbase_device *kbdev); - -/** - * kbase_sync_single_for_device - update physical memory and give GPU ownership - * @kbdev: Device pointer - * @handle: DMA address of region - * @size: Size of region to sync - * @dir: DMA data direction - */ - -void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir); - -/** - * kbase_sync_single_for_cpu - update physical memory and give CPU ownership - * @kbdev: Device pointer - * @handle: DMA address of region - * @size: Size of region to sync - * @dir: DMA data direction - */ - -void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir); - -#ifdef CONFIG_DEBUG_FS -/** - * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. - * @kctx: kbase context - */ -void kbase_jit_debugfs_init(struct kbase_context *kctx); -#endif /* CONFIG_DEBUG_FS */ - -/** - * kbase_jit_init - Initialize the JIT memory pool management - * @kctx: kbase context - * - * Returns zero on success or negative error number on failure. - */ -int kbase_jit_init(struct kbase_context *kctx); - -/** - * kbase_jit_allocate - Allocate JIT memory - * @kctx: kbase context - * @info: JIT allocation information - * - * Return: JIT allocation on success or NULL on failure. - */ -struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info); - -/** - * kbase_jit_free - Free a JIT allocation - * @kctx: kbase context - * @reg: JIT allocation - * - * Frees a JIT allocation and places it into the free pool for later reuse. - */ -void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); - -/** - * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing - * @reg: JIT allocation - */ -void kbase_jit_backing_lost(struct kbase_va_region *reg); - -/** - * kbase_jit_evict - Evict a JIT allocation from the pool - * @kctx: kbase context - * - * Evict the least recently used JIT allocation from the pool. This can be - * required if normal VA allocations are failing due to VA exhaustion. - * - * Return: True if a JIT allocation was freed, false otherwise. - */ -bool kbase_jit_evict(struct kbase_context *kctx); - -/** - * kbase_jit_term - Terminate the JIT memory pool management - * @kctx: kbase context - */ -void kbase_jit_term(struct kbase_context *kctx); - -/** - * kbase_has_exec_va_zone - EXEC_VA zone predicate - * - * Determine whether an EXEC_VA zone has been created for the GPU address space - * of the given kbase context. - * - * @kctx: kbase context - * - * Return: True if the kbase context has an EXEC_VA zone. - */ -bool kbase_has_exec_va_zone(struct kbase_context *kctx); - -/** - * kbase_map_external_resource - Map an external resource to the GPU. - * @kctx: kbase context. - * @reg: The region to map. - * @locked_mm: The mm_struct which has been locked for this operation. - * - * Return: The physical allocation which backs the region on success or NULL - * on failure. - */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm); - -/** - * kbase_unmap_external_resource - Unmap an external resource from the GPU. - * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. - */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); - -/** - * kbase_sticky_resource_init - Initialize sticky resource management. - * @kctx: kbase context - * - * Returns zero on success or negative error number on failure. - */ -int kbase_sticky_resource_init(struct kbase_context *kctx); - -/** - * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. - * @kctx: kbase context. - * @gpu_addr: The GPU address of the external resource. - * - * Return: The metadata object which represents the binding between the - * external resource and the kbase context on success or NULL on failure. - */ -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr); - -/** - * kbase_sticky_resource_release - Release a reference on a sticky resource. - * @kctx: kbase context. - * @meta: Binding metadata. - * @gpu_addr: GPU address of the external resource. - * - * If meta is NULL then gpu_addr will be used to scan the metadata list and - * find the matching metadata (if any), otherwise the provided meta will be - * used and gpu_addr will be ignored. - * - * Return: True if the release found the metadata and the reference was dropped. - */ -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); - -/** - * kbase_sticky_resource_term - Terminate sticky resource management. - * @kctx: kbase context - */ -void kbase_sticky_resource_term(struct kbase_context *kctx); - -/** - * kbase_mem_pool_lock - Lock a memory pool - * @pool: Memory pool to lock - */ -static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) -{ - spin_lock(&pool->pool_lock); -} - -/** - * kbase_mem_pool_lock - Release a memory pool - * @pool: Memory pool to lock - */ -static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) -{ - spin_unlock(&pool->pool_lock); -} - -/** - * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. - * @alloc: The physical allocation - */ -void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); - - -#endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.c deleted file mode 100755 index c70112d275f0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.c +++ /dev/null @@ -1,2487 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mem_linux.c - * Base kernel memory APIs, Linux implementation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) -#include -#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ -#ifdef CONFIG_DMA_SHARED_BUFFER -#include -#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -#include -#include - -#include -#include -#include -#include - - -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map); -static void kbase_vunmap_phy_pages(struct kbase_context *kctx, - struct kbase_vmap_struct *map); - -static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); - -/* Retrieve the associated region pointer if the GPU address corresponds to - * one of the event memory pages. The enclosing region, if found, shouldn't - * have been marked as free. - */ -static struct kbase_va_region *kbase_find_event_mem_region( - struct kbase_context *kctx, u64 gpu_addr) -{ - - return NULL; -} - -/** - * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping - * of the physical allocation belonging to a - * region - * @kctx: The kernel base context @reg belongs to. - * @reg: The region whose physical allocation is to be mapped - * @vsize: The size of the requested region, in pages - * @size: The size in pages initially committed to the region - * - * Return: 0 on success, otherwise an error code indicating failure - * - * Maps the physical allocation backing a non-free @reg, so it may be - * accessed directly from the kernel. This is only supported for physical - * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of - * physical allocation. - * - * The mapping is stored directly in the allocation that backs @reg. The - * refcount is not incremented at this point. Instead, use of the mapping should - * be surrounded by kbase_phy_alloc_mapping_get() and - * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the - * client is accessing it. - * - * Both cached and uncached regions are allowed, but any sync operations are the - * responsibility of the client using the permanent mapping. - * - * A number of checks are made to ensure that a region that needs a permanent - * mapping can actually be supported: - * - The region must be created as fully backed - * - The region must not be growable - * - * This function will fail if those checks are not satisfied. - * - * On success, the region will also be forced into a certain kind: - * - It will no longer be growable - */ -static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, - struct kbase_va_region *reg, size_t vsize, size_t size) -{ - size_t size_bytes = (size << PAGE_SHIFT); - struct kbase_vmap_struct *kern_mapping; - int err = 0; - - /* Can only map in regions that are always fully committed - * Don't setup the mapping twice - * Only support KBASE_MEM_TYPE_NATIVE allocations - */ - if (vsize != size || reg->cpu_alloc->permanent_map != NULL || - reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) - return -EINVAL; - - if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - - kctx->permanent_mapped_pages)) { - dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages", - (u64)size, - KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, - kctx->permanent_mapped_pages); - return -ENOMEM; - } - - kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); - if (!kern_mapping) - return -ENOMEM; - - err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); - if (err < 0) - goto vmap_fail; - - /* No support for growing or shrinking mapped regions */ - reg->flags &= ~KBASE_REG_GROWABLE; - - reg->cpu_alloc->permanent_map = kern_mapping; - kctx->permanent_mapped_pages += size; - - return 0; -vmap_fail: - kfree(kern_mapping); - return err; -} - -void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) -{ - WARN_ON(!alloc->permanent_map); - kbase_vunmap_phy_pages(kctx, alloc->permanent_map); - kfree(alloc->permanent_map); - - alloc->permanent_map = NULL; - - /* Mappings are only done on cpu_alloc, so don't need to worry about - * this being reduced a second time if a separate gpu_alloc is - * freed - */ - WARN_ON(alloc->nents > kctx->permanent_mapped_pages); - kctx->permanent_mapped_pages -= alloc->nents; -} - -void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, - u64 gpu_addr, - struct kbase_vmap_struct **out_kern_mapping) -{ - struct kbase_va_region *reg; - void *kern_mem_ptr = NULL; - struct kbase_vmap_struct *kern_mapping; - u64 mapping_offset; - - WARN_ON(!kctx); - WARN_ON(!out_kern_mapping); - - kbase_gpu_vm_lock(kctx); - - /* First do a quick lookup in the list of event memory regions */ - reg = kbase_find_event_mem_region(kctx, gpu_addr); - - if (!reg) { - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); - } - - if (reg == NULL || (reg->flags & KBASE_REG_FREE) != 0) - goto out_unlock; - - kern_mapping = reg->cpu_alloc->permanent_map; - if (kern_mapping == NULL) - goto out_unlock; - - mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); - - /* Refcount the allocations to prevent them disappearing */ - WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); - WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); - (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); - (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); - - kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); - *out_kern_mapping = kern_mapping; -out_unlock: - kbase_gpu_vm_unlock(kctx); - return kern_mem_ptr; -} - -void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, - struct kbase_vmap_struct *kern_mapping) -{ - WARN_ON(!kctx); - WARN_ON(!kern_mapping); - - WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); - WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); - - kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); - kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); - - /* kern_mapping and the gpu/cpu phy allocs backing it must not be used - * from now on - */ -} - -struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va) -{ - int zone; - struct kbase_va_region *reg; - struct rb_root *rbtree; - struct device *dev; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(flags); - KBASE_DEBUG_ASSERT(gpu_va); - - dev = kctx->kbdev->dev; - *gpu_va = 0; /* return 0 on failure */ - - if (!kbase_check_alloc_flags(*flags)) { - dev_warn(dev, - "kbase_mem_alloc called with bad flags (%llx)", - (unsigned long long)*flags); - goto bad_flags; - } - -#ifdef CONFIG_DEBUG_FS - if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { - /* Mask coherency flags if infinite cache is enabled to prevent - * the skipping of syncs from BASE side. - */ - *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | - BASE_MEM_COHERENT_SYSTEM); - } -#endif - - if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { - /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ - *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; - } - if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); - goto bad_flags; - } - if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ - *flags &= ~BASE_MEM_COHERENT_SYSTEM; - } - - if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) - goto bad_sizes; - - /* find out which VA zone to use */ - if (*flags & BASE_MEM_SAME_VA) { - rbtree = &kctx->reg_rbtree_same; - zone = KBASE_REG_ZONE_SAME_VA; - } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { - rbtree = &kctx->reg_rbtree_exec; - zone = KBASE_REG_ZONE_EXEC_VA; - } else { - rbtree = &kctx->reg_rbtree_custom; - zone = KBASE_REG_ZONE_CUSTOM_VA; - } - - reg = kbase_alloc_free_region(rbtree, 0, va_pages, zone); - if (!reg) { - dev_err(dev, "Failed to allocate free region"); - goto no_region; - } - - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - - if (kbase_reg_prepare_native(reg, kctx) != 0) { - dev_err(dev, "Failed to prepare region"); - goto prepare_failed; - } - - if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { - /* kbase_check_alloc_sizes() already checks extent is valid for - * assigning to reg->extent */ - reg->extent = extent; - } else { - reg->extent = 0; - } - - if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { - dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", - (unsigned long long)commit_pages, - (unsigned long long)va_pages); - goto no_mem; - } - reg->initial_commit = commit_pages; - - kbase_gpu_vm_lock(kctx); - - if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { - /* Permanent kernel mappings must happen as soon as - * reg->cpu_alloc->pages is ready. Currently this happens after - * kbase_alloc_phy_pages(). If we move that to setup pages - * earlier, also move this call too - */ - int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, - commit_pages); - if (err < 0) { - kbase_gpu_vm_unlock(kctx); - goto no_kern_mapping; - } - } - - - /* mmap needed to setup VA? */ - if (*flags & BASE_MEM_SAME_VA) { - unsigned long prot = PROT_NONE; - unsigned long va_size = va_pages << PAGE_SHIFT; - unsigned long va_map = va_size; - unsigned long cookie, cookie_nr; - unsigned long cpu_addr; - - /* Bind to a cookie */ - if (!kctx->cookies) { - dev_err(dev, "No cookies available for allocation!"); - kbase_gpu_vm_unlock(kctx); - goto no_cookie; - } - /* return a cookie */ - cookie_nr = __ffs(kctx->cookies); - kctx->cookies &= ~(1UL << cookie_nr); - BUG_ON(kctx->pending_regions[cookie_nr]); - kctx->pending_regions[cookie_nr] = reg; - - kbase_gpu_vm_unlock(kctx); - - /* relocate to correct base */ - cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); - cookie <<= PAGE_SHIFT; - - /* - * 10.1-10.4 UKU userland relies on the kernel to call mmap. - * For all other versions we can just return the cookie - */ - if (kctx->api_version < KBASE_API_VERSION(10, 1) || - kctx->api_version > KBASE_API_VERSION(10, 4)) { - *gpu_va = (u64) cookie; - return reg; - } - if (*flags & BASE_MEM_PROT_CPU_RD) - prot |= PROT_READ; - if (*flags & BASE_MEM_PROT_CPU_WR) - prot |= PROT_WRITE; - - cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, - MAP_SHARED, cookie); - - if (IS_ERR_VALUE(cpu_addr)) { - kbase_gpu_vm_lock(kctx); - kctx->pending_regions[cookie_nr] = NULL; - kctx->cookies |= (1UL << cookie_nr); - kbase_gpu_vm_unlock(kctx); - goto no_mmap; - } - - *gpu_va = (u64) cpu_addr; - } else /* we control the VA */ { - if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { - dev_warn(dev, "Failed to map memory on GPU"); - kbase_gpu_vm_unlock(kctx); - goto no_mmap; - } - /* return real GPU VA */ - *gpu_va = reg->start_pfn << PAGE_SHIFT; - - kbase_gpu_vm_unlock(kctx); - } - - return reg; - -no_mmap: -no_cookie: -no_kern_mapping: -no_mem: - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); -invalid_flags: -prepare_failed: - kfree(reg); -no_region: -bad_sizes: -bad_flags: - return NULL; -} -KBASE_EXPORT_TEST_API(kbase_mem_alloc); - -int kbase_mem_query(struct kbase_context *kctx, - u64 gpu_addr, u64 query, u64 * const out) -{ - struct kbase_va_region *reg; - int ret = -EINVAL; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(out); - - if (gpu_addr & ~PAGE_MASK) { - dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); - return -EINVAL; - } - - kbase_gpu_vm_lock(kctx); - - /* Validate the region */ - reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto out_unlock; - - switch (query) { - case KBASE_MEM_QUERY_COMMIT_SIZE: - if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { - *out = kbase_reg_current_backed_size(reg); - } else { - size_t i; - struct kbase_aliased *aliased; - *out = 0; - aliased = reg->cpu_alloc->imported.alias.aliased; - for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) - *out += aliased[i].length; - } - break; - case KBASE_MEM_QUERY_VA_SIZE: - *out = reg->nr_pages; - break; - case KBASE_MEM_QUERY_FLAGS: - { - *out = 0; - if (KBASE_REG_CPU_WR & reg->flags) - *out |= BASE_MEM_PROT_CPU_WR; - if (KBASE_REG_CPU_RD & reg->flags) - *out |= BASE_MEM_PROT_CPU_RD; - if (KBASE_REG_CPU_CACHED & reg->flags) - *out |= BASE_MEM_CACHED_CPU; - if (KBASE_REG_GPU_WR & reg->flags) - *out |= BASE_MEM_PROT_GPU_WR; - if (KBASE_REG_GPU_RD & reg->flags) - *out |= BASE_MEM_PROT_GPU_RD; - if (!(KBASE_REG_GPU_NX & reg->flags)) - *out |= BASE_MEM_PROT_GPU_EX; - if (KBASE_REG_SHARE_BOTH & reg->flags) - *out |= BASE_MEM_COHERENT_SYSTEM; - if (KBASE_REG_SHARE_IN & reg->flags) - *out |= BASE_MEM_COHERENT_LOCAL; - if (kctx->api_version >= KBASE_API_VERSION(11, 2)) { - /* Prior to 11.2, these were known about by user-side - * but we did not return them. Returning some of these - * caused certain clients that were not expecting them - * to fail, so we omit all of them as a special-case - * for compatibility reasons */ - if (KBASE_REG_PF_GROW & reg->flags) - *out |= BASE_MEM_GROW_ON_GPF; - if (KBASE_REG_SECURE & reg->flags) - *out |= BASE_MEM_SECURE; - } - if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) - *out |= BASE_MEM_TILER_ALIGN_TOP; - if (!(KBASE_REG_GPU_CACHED & reg->flags)) - *out |= BASE_MEM_UNCACHED_GPU; - if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) - *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; - - WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, - "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); - *out &= BASE_MEM_FLAGS_QUERYABLE; - break; - } - default: - *out = 0; - goto out_unlock; - } - - ret = 0; - -out_unlock: - kbase_gpu_vm_unlock(kctx); - return ret; -} - -/** - * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the - * Ephemeral memory eviction list. - * @s: Shrinker - * @sc: Shrinker control - * - * Return: Number of pages which can be freed. - */ -static -unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_context *kctx; - struct kbase_mem_phy_alloc *alloc; - unsigned long pages = 0; - - kctx = container_of(s, struct kbase_context, reclaim); - - mutex_lock(&kctx->jit_evict_lock); - - list_for_each_entry(alloc, &kctx->evict_list, evict_node) - pages += alloc->nents; - - mutex_unlock(&kctx->jit_evict_lock); - return pages; -} - -/** - * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction - * list for pages and try to reclaim them. - * @s: Shrinker - * @sc: Shrinker control - * - * Return: Number of pages freed (can be less then requested) or -1 if the - * shrinker failed to free pages in its pool. - * - * Note: - * This function accesses region structures without taking the region lock, - * this is required as the OOM killer can call the shrinker after the region - * lock has already been held. - * This is safe as we can guarantee that a region on the eviction list will - * not be freed (kbase_mem_free_region removes the allocation from the list - * before destroying it), or modified by other parts of the driver. - * The eviction list itself is guarded by the eviction lock and the MMU updates - * are protected by their own lock. - */ -static -unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_context *kctx; - struct kbase_mem_phy_alloc *alloc; - struct kbase_mem_phy_alloc *tmp; - unsigned long freed = 0; - - kctx = container_of(s, struct kbase_context, reclaim); - mutex_lock(&kctx->jit_evict_lock); - - list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { - int err; - - err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, - 0, alloc->nents); - if (err != 0) { - /* - * Failed to remove GPU mapping, tell the shrinker - * to stop trying to shrink our slab even though we - * have pages in it. - */ - freed = -1; - goto out_unlock; - } - - /* - * Update alloc->evicted before freeing the backing so the - * helper can determine that it needs to bypass the accounting - * and memory pool. - */ - alloc->evicted = alloc->nents; - - kbase_free_phy_pages_helper(alloc, alloc->evicted); - freed += alloc->evicted; - list_del_init(&alloc->evict_node); - - /* - * Inform the JIT allocator this region has lost backing - * as it might need to free the allocation. - */ - kbase_jit_backing_lost(alloc->reg); - - /* Enough pages have been freed so stop now */ - if (freed > sc->nr_to_scan) - break; - } -out_unlock: - mutex_unlock(&kctx->jit_evict_lock); - - return freed; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_evictable_reclaim_count_objects(s, sc); - - return kbase_mem_evictable_reclaim_scan_objects(s, sc); -} -#endif - -int kbase_mem_evictable_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->evict_list); - mutex_init(&kctx->jit_evict_lock); - - /* Register shrinker */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; -#else - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; - kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; -#endif - kctx->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) - kctx->reclaim.batch = 0; -#endif - register_shrinker(&kctx->reclaim); - return 0; -} - -void kbase_mem_evictable_deinit(struct kbase_context *kctx) -{ - unregister_shrinker(&kctx->reclaim); -} - -/** - * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. - * @alloc: The physical allocation - */ -void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) -{ - struct kbase_context *kctx = alloc->imported.native.kctx; - int __maybe_unused new_page_count; - - kbase_process_page_usage_dec(kctx, alloc->nents); - new_page_count = kbase_atomic_sub_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); -} - -/** - * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. - * @alloc: The physical allocation - */ -static -void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) -{ - struct kbase_context *kctx = alloc->imported.native.kctx; - int __maybe_unused new_page_count; - - new_page_count = kbase_atomic_add_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); - - /* Increase mm counters so that the allocation is accounted for - * against the process and thus is visible to the OOM killer, - */ - kbase_process_page_usage_inc(kctx, alloc->nents); - - KBASE_TLSTREAM_AUX_PAGESALLOC( - kctx->id, - (u64)new_page_count); -} - -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) -{ - struct kbase_context *kctx = gpu_alloc->imported.native.kctx; - - lockdep_assert_held(&kctx->reg_lock); - - kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, - 0, gpu_alloc->nents); - - mutex_lock(&kctx->jit_evict_lock); - /* This allocation can't already be on a list. */ - WARN_ON(!list_empty(&gpu_alloc->evict_node)); - - /* - * Add the allocation to the eviction list, after this point the shrink - * can reclaim it. - */ - list_add(&gpu_alloc->evict_node, &kctx->evict_list); - mutex_unlock(&kctx->jit_evict_lock); - kbase_mem_evictable_mark_reclaim(gpu_alloc); - - gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; - return 0; -} - -bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) -{ - struct kbase_context *kctx = gpu_alloc->imported.native.kctx; - int err = 0; - - lockdep_assert_held(&kctx->reg_lock); - - mutex_lock(&kctx->jit_evict_lock); - /* - * First remove the allocation from the eviction list as it's no - * longer eligible for eviction. - */ - list_del_init(&gpu_alloc->evict_node); - mutex_unlock(&kctx->jit_evict_lock); - - if (gpu_alloc->evicted == 0) { - /* - * The backing is still present, update the VM stats as it's - * in use again. - */ - kbase_mem_evictable_unmark_reclaim(gpu_alloc); - } else { - /* If the region is still alive ... */ - if (gpu_alloc->reg) { - /* ... allocate replacement backing ... */ - err = kbase_alloc_phy_pages_helper(gpu_alloc, - gpu_alloc->evicted); - - /* - * ... and grow the mapping back to its - * pre-eviction size. - */ - if (!err) - err = kbase_mem_grow_gpu_mapping(kctx, - gpu_alloc->reg, - gpu_alloc->evicted, 0); - - gpu_alloc->evicted = 0; - } - } - - /* If the region is still alive remove the DONT_NEED attribute. */ - if (gpu_alloc->reg) - gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; - - return (err == 0); -} - -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) -{ - struct kbase_va_region *reg; - int ret = -EINVAL; - unsigned int real_flags = 0; - unsigned int prev_flags = 0; - bool prev_needed, new_needed; - - KBASE_DEBUG_ASSERT(kctx); - - if (!gpu_addr) - return -EINVAL; - - if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) - return -EINVAL; - - /* nuke other bits */ - flags &= mask; - - /* check for only supported flags */ - if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) - goto out; - - /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) - goto out; - - /* convert flags */ - if (BASE_MEM_COHERENT_SYSTEM & flags) - real_flags |= KBASE_REG_SHARE_BOTH; - else if (BASE_MEM_COHERENT_LOCAL & flags) - real_flags |= KBASE_REG_SHARE_IN; - - /* now we can lock down the context, and find the region */ - down_write(¤t->mm->mmap_sem); - kbase_gpu_vm_lock(kctx); - - /* Validate the region */ - reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto out_unlock; - - /* Is the region being transitioning between not needed and needed? */ - prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; - new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; - if (prev_needed != new_needed) { - /* Aliased allocations can't be made ephemeral */ - if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) - goto out_unlock; - - if (new_needed) { - /* Only native allocations can be marked not needed */ - if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { - ret = -EINVAL; - goto out_unlock; - } - ret = kbase_mem_evictable_make(reg->gpu_alloc); - if (ret) - goto out_unlock; - } else { - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } - - /* limit to imported memory */ - if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) - goto out_unlock; - - /* shareability flags are ignored for GPU uncached memory */ - if (!(reg->flags & KBASE_REG_GPU_CACHED)) { - ret = 0; - goto out_unlock; - } - - /* no change? */ - if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { - ret = 0; - goto out_unlock; - } - - /* save for roll back */ - prev_flags = reg->flags; - reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); - reg->flags |= real_flags; - - /* Currently supporting only imported memory */ -#ifdef CONFIG_DMA_SHARED_BUFFER - if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - /* Future use will use the new flags, existing mapping will NOT be updated - * as memory should not be in use by the GPU when updating the flags. - */ - ret = 0; - WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - - /* roll back on error */ - if (ret) - reg->flags = prev_flags; - -out_unlock: - kbase_gpu_vm_unlock(kctx); - up_write(¤t->mm->mmap_sem); -out: - return ret; -} - -#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) - -#ifdef CONFIG_DMA_SHARED_BUFFER -static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, - int fd, u64 *va_pages, u64 *flags, u32 padding) -{ - struct kbase_va_region *reg; - struct dma_buf *dma_buf; - struct dma_buf_attachment *dma_attachment; - bool shared_zone = false; - - dma_buf = dma_buf_get(fd); - if (IS_ERR_OR_NULL(dma_buf)) - goto no_buf; - - dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); - if (!dma_attachment) - goto no_attachment; - - *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; - if (!*va_pages) - goto bad_size; - - if (*va_pages > (U64_MAX / PAGE_SIZE)) - /* 64-bit address range is the max */ - goto bad_size; - - /* ignore SAME_VA */ - *flags &= ~BASE_MEM_SAME_VA; - - if (*flags & BASE_MEM_IMPORT_SHARED) - shared_zone = true; - -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - /* - * 64-bit tasks require us to reserve VA on the CPU that we use - * on the GPU. - */ - shared_zone = true; - } -#endif - - if (shared_zone) { - *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, - 0, *va_pages, KBASE_REG_ZONE_SAME_VA); - } else { - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); - } - - if (!reg) - goto no_region; - - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - - reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, - KBASE_MEM_TYPE_IMPORTED_UMM); - if (IS_ERR_OR_NULL(reg->gpu_alloc)) - goto no_alloc_obj; - - reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - /* No pages to map yet */ - reg->gpu_alloc->nents = 0; - - reg->flags &= ~KBASE_REG_FREE; - reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ - reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ - - if (*flags & BASE_MEM_SECURE) - reg->flags |= KBASE_REG_SECURE; - - if (padding) - reg->flags |= KBASE_REG_IMPORT_PAD; - - reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; - reg->gpu_alloc->imported.umm.sgt = NULL; - reg->gpu_alloc->imported.umm.dma_buf = dma_buf; - reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; - reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; - reg->extent = 0; - - return reg; - -no_alloc_obj: -invalid_flags: - kfree(reg); -no_region: -bad_size: - dma_buf_detach(dma_buf, dma_attachment); -no_attachment: - dma_buf_put(dma_buf); -no_buf: - return NULL; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - -u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) -{ - u32 cpu_cache_line_size = cache_line_size(); - u32 gpu_cache_line_size = - (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); - - return ((cpu_cache_line_size > gpu_cache_line_size) ? - cpu_cache_line_size : - gpu_cache_line_size); -} - -static struct kbase_va_region *kbase_mem_from_user_buffer( - struct kbase_context *kctx, unsigned long address, - unsigned long size, u64 *va_pages, u64 *flags) -{ - long i; - struct kbase_va_region *reg; - struct rb_root *rbtree; - long faulted_pages; - int zone = KBASE_REG_ZONE_CUSTOM_VA; - bool shared_zone = false; - u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); - struct kbase_alloc_import_user_buf *user_buf; - struct page **pages = NULL; - - if ((address & (cache_line_alignment - 1)) != 0 || - (size & (cache_line_alignment - 1)) != 0) { - if (*flags & BASE_MEM_UNCACHED_GPU) { - dev_warn(kctx->kbdev->dev, - "User buffer is not cache line aligned and marked as GPU uncached\n"); - goto bad_size; - } - - /* Coherency must be enabled to handle partial cache lines */ - if (*flags & (BASE_MEM_COHERENT_SYSTEM | - BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { - /* Force coherent system required flag, import will - * then fail if coherency isn't available - */ - *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; - } else { - dev_warn(kctx->kbdev->dev, - "User buffer is not cache line aligned and no coherency enabled\n"); - goto bad_size; - } - } - - *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - - PFN_DOWN(address); - if (!*va_pages) - goto bad_size; - - if (*va_pages > (UINT64_MAX / PAGE_SIZE)) - /* 64-bit address range is the max */ - goto bad_size; - - /* SAME_VA generally not supported with imported memory (no known use cases) */ - *flags &= ~BASE_MEM_SAME_VA; - - if (*flags & BASE_MEM_IMPORT_SHARED) - shared_zone = true; - -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - /* - * 64-bit tasks require us to reserve VA on the CPU that we use - * on the GPU. - */ - shared_zone = true; - } -#endif - - if (shared_zone) { - *flags |= BASE_MEM_NEED_MMAP; - zone = KBASE_REG_ZONE_SAME_VA; - rbtree = &kctx->reg_rbtree_same; - } else - rbtree = &kctx->reg_rbtree_custom; - - reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); - - if (!reg) - goto no_region; - - reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, - KBASE_MEM_TYPE_IMPORTED_USER_BUF); - if (IS_ERR_OR_NULL(reg->gpu_alloc)) - goto no_alloc_obj; - - reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - - reg->flags &= ~KBASE_REG_FREE; - reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ - reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ - - user_buf = ®->gpu_alloc->imported.user_buf; - - user_buf->size = size; - user_buf->address = address; - user_buf->nr_pages = *va_pages; - user_buf->mm = current->mm; -#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE - atomic_inc(¤t->mm->mm_count); -#else - mmgrab(current->mm); -#endif - if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) - user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); - else - user_buf->pages = kmalloc_array(*va_pages, - sizeof(struct page *), GFP_KERNEL); - - if (!user_buf->pages) - goto no_page_array; - - /* If the region is coherent with the CPU then the memory is imported - * and mapped onto the GPU immediately. - * Otherwise get_user_pages is called as a sanity check, but with - * NULL as the pages argument which will fault the pages, but not - * pin them. The memory will then be pinned only around the jobs that - * specify the region as an external resource. - */ - if (reg->flags & KBASE_REG_SHARE_BOTH) { - pages = user_buf->pages; - *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; - } - - down_read(¤t->mm->mmap_sem); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) - faulted_pages = get_user_pages(current, current->mm, address, *va_pages, - reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) - faulted_pages = get_user_pages(address, *va_pages, - reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); -#else - faulted_pages = get_user_pages(address, *va_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL); -#endif - - up_read(¤t->mm->mmap_sem); - - if (faulted_pages != *va_pages) - goto fault_mismatch; - - reg->gpu_alloc->nents = 0; - reg->extent = 0; - - if (pages) { - struct device *dev = kctx->kbdev->dev; - unsigned long local_size = user_buf->size; - unsigned long offset = user_buf->address & ~PAGE_MASK; - struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); - - /* Top bit signifies that this was pinned on import */ - user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; - - for (i = 0; i < faulted_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto unwind_dma_map; - - user_buf->dma_addrs[i] = dma_addr; - pa[i] = as_tagged(page_to_phys(pages[i])); - - local_size -= min; - offset = 0; - } - - reg->gpu_alloc->nents = faulted_pages; - } - - return reg; - -unwind_dma_map: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - user_buf->dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - } -fault_mismatch: - if (pages) { - for (i = 0; i < faulted_pages; i++) - put_page(pages[i]); - } -no_page_array: -invalid_flags: - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); -no_alloc_obj: - kfree(reg); -no_region: -bad_size: - return NULL; - -} - - -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, - u64 nents, struct base_mem_aliasing_info *ai, - u64 *num_pages) -{ - struct kbase_va_region *reg; - u64 gpu_va; - size_t i; - bool coherent; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(flags); - KBASE_DEBUG_ASSERT(ai); - KBASE_DEBUG_ASSERT(num_pages); - - /* mask to only allowed flags */ - *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | - BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | - BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); - - if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { - dev_warn(kctx->kbdev->dev, - "kbase_mem_alias called with bad flags (%llx)", - (unsigned long long)*flags); - goto bad_flags; - } - coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; - - if (!stride) - goto bad_stride; - - if (!nents) - goto bad_nents; - - if ((nents * stride) > (U64_MAX / PAGE_SIZE)) - /* 64-bit address range is the max */ - goto bad_size; - - /* calculate the number of pages this alias will cover */ - *num_pages = nents * stride; - -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - /* 64-bit tasks must MMAP anyway, but not expose this address to - * clients */ - *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, - *num_pages, - KBASE_REG_ZONE_SAME_VA); - } else { -#else - if (1) { -#endif - reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); - } - - if (!reg) - goto no_reg; - - /* zero-sized page array, as we don't need one/can support one */ - reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS); - if (IS_ERR_OR_NULL(reg->gpu_alloc)) - goto no_alloc_obj; - - reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - if (kbase_update_region_flags(kctx, reg, *flags) != 0) - goto invalid_flags; - - reg->gpu_alloc->imported.alias.nents = nents; - reg->gpu_alloc->imported.alias.stride = stride; - reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); - if (!reg->gpu_alloc->imported.alias.aliased) - goto no_aliased_array; - - kbase_gpu_vm_lock(kctx); - - /* validate and add src handles */ - for (i = 0; i < nents; i++) { - if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle.basep.handle != - BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) - goto bad_handle; /* unsupported magic handle */ - if (!ai[i].length) - goto bad_handle; /* must be > 0 */ - if (ai[i].length > stride) - goto bad_handle; /* can't be larger than the - stride */ - reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; - } else { - struct kbase_va_region *aliasing_reg; - struct kbase_mem_phy_alloc *alloc; - - aliasing_reg = kbase_region_tracker_find_region_base_address( - kctx, - (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); - - /* validate found region */ - if (!aliasing_reg) - goto bad_handle; /* Not found */ - if (aliasing_reg->flags & KBASE_REG_FREE) - goto bad_handle; /* Free region */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) - goto bad_handle; /* Ephemeral region */ - if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) - goto bad_handle; /* GPU uncached memory */ - if (!aliasing_reg->gpu_alloc) - goto bad_handle; /* No alloc */ - if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) - goto bad_handle; /* Not a native alloc */ - if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) - goto bad_handle; - /* Non-coherent memory cannot alias - coherent memory, and vice versa.*/ - - /* check size against stride */ - if (!ai[i].length) - goto bad_handle; /* must be > 0 */ - if (ai[i].length > stride) - goto bad_handle; /* can't be larger than the - stride */ - - alloc = aliasing_reg->gpu_alloc; - - /* check against the alloc's size */ - if (ai[i].offset > alloc->nents) - goto bad_handle; /* beyond end */ - if (ai[i].offset + ai[i].length > alloc->nents) - goto bad_handle; /* beyond end */ - - reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); - reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; - reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; - } - } - -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - /* Bind to a cookie */ - if (!kctx->cookies) { - dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); - goto no_cookie; - } - /* return a cookie */ - gpu_va = __ffs(kctx->cookies); - kctx->cookies &= ~(1UL << gpu_va); - BUG_ON(kctx->pending_regions[gpu_va]); - kctx->pending_regions[gpu_va] = reg; - - /* relocate to correct base */ - gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); - gpu_va <<= PAGE_SHIFT; - } else /* we control the VA */ { -#else - if (1) { -#endif - if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { - dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); - goto no_mmap; - } - /* return real GPU VA */ - gpu_va = reg->start_pfn << PAGE_SHIFT; - } - - reg->flags &= ~KBASE_REG_FREE; - reg->flags &= ~KBASE_REG_GROWABLE; - - kbase_gpu_vm_unlock(kctx); - - return gpu_va; - -#ifdef CONFIG_64BIT -no_cookie: -#endif -no_mmap: -bad_handle: - kbase_gpu_vm_unlock(kctx); -no_aliased_array: -invalid_flags: - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); -no_alloc_obj: - kfree(reg); -no_reg: -bad_size: -bad_nents: -bad_stride: -bad_flags: - return 0; -} - -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, - u64 *flags) -{ - struct kbase_va_region *reg; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(gpu_va); - KBASE_DEBUG_ASSERT(va_pages); - KBASE_DEBUG_ASSERT(flags); - - if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && - kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) - *flags |= BASE_MEM_SAME_VA; - - if (!kbase_check_import_flags(*flags)) { - dev_warn(kctx->kbdev->dev, - "kbase_mem_import called with bad flags (%llx)", - (unsigned long long)*flags); - goto bad_flags; - } - - if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && - (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { - /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ - *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; - } - if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - dev_warn(kctx->kbdev->dev, - "kbase_mem_import call required coherent mem when unavailable"); - goto bad_flags; - } - if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - !kbase_device_is_cpu_coherent(kctx->kbdev)) { - /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ - *flags &= ~BASE_MEM_COHERENT_SYSTEM; - } - - if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { - dev_warn(kctx->kbdev->dev, - "padding is only supported for UMM"); - goto bad_flags; - } - - switch (type) { -#ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: { - int fd; - - if (get_user(fd, (int __user *)phandle)) - reg = NULL; - else - reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, - padding); - } - break; -#endif /* CONFIG_DMA_SHARED_BUFFER */ - case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { - struct base_mem_import_user_buffer user_buffer; - void __user *uptr; - - if (copy_from_user(&user_buffer, phandle, - sizeof(user_buffer))) { - reg = NULL; - } else { -#ifdef CONFIG_COMPAT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) - uptr = compat_ptr(user_buffer.ptr); - else -#endif - uptr = u64_to_user_ptr(user_buffer.ptr); - - reg = kbase_mem_from_user_buffer(kctx, - (unsigned long)uptr, user_buffer.length, - va_pages, flags); - } - break; - } - default: { - reg = NULL; - break; - } - } - - if (!reg) - goto no_reg; - - kbase_gpu_vm_lock(kctx); - - /* mmap needed to setup VA? */ - if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { - /* Bind to a cookie */ - if (!kctx->cookies) - goto no_cookie; - /* return a cookie */ - *gpu_va = __ffs(kctx->cookies); - kctx->cookies &= ~(1UL << *gpu_va); - BUG_ON(kctx->pending_regions[*gpu_va]); - kctx->pending_regions[*gpu_va] = reg; - - /* relocate to correct base */ - *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); - *gpu_va <<= PAGE_SHIFT; - - } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { - /* we control the VA, mmap now to the GPU */ - if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) - goto no_gpu_va; - /* return real GPU VA */ - *gpu_va = reg->start_pfn << PAGE_SHIFT; - } else { - /* we control the VA, but nothing to mmap yet */ - if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) - goto no_gpu_va; - /* return real GPU VA */ - *gpu_va = reg->start_pfn << PAGE_SHIFT; - } - - /* clear out private flags */ - *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); - - kbase_gpu_vm_unlock(kctx); - - return 0; - -no_gpu_va: -no_cookie: - kbase_gpu_vm_unlock(kctx); - kbase_mem_phy_alloc_put(reg->cpu_alloc); - kbase_mem_phy_alloc_put(reg->gpu_alloc); - kfree(reg); -no_reg: -bad_flags: - *gpu_va = 0; - *va_pages = 0; - *flags = 0; - return -ENOMEM; -} - -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) -{ - struct tagged_addr *phy_pages; - u64 delta = new_pages - old_pages; - int ret = 0; - - lockdep_assert_held(&kctx->reg_lock); - - /* Map the new pages into the GPU */ - phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags, kctx->as_nr); - - return ret; -} - -void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) -{ - u64 gpu_va_start = reg->start_pfn; - - if (new_pages == old_pages) - /* Nothing to do */ - return; - - unmap_mapping_range(kctx->filp->f_inode->i_mapping, - (gpu_va_start + new_pages)<kbdev, &kctx->mmu, - reg->start_pfn + new_pages, delta, kctx->as_nr); - - return ret; -} - -int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) -{ - u64 old_pages; - u64 delta; - int res = -EINVAL; - struct kbase_va_region *reg; - bool read_locked = false; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(gpu_addr != 0); - - if (gpu_addr & ~PAGE_MASK) { - dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); - return -EINVAL; - } - - down_write(¤t->mm->mmap_sem); - kbase_gpu_vm_lock(kctx); - - /* Validate the region */ - reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto out_unlock; - - KBASE_DEBUG_ASSERT(reg->cpu_alloc); - KBASE_DEBUG_ASSERT(reg->gpu_alloc); - - if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) - goto out_unlock; - - if (0 == (reg->flags & KBASE_REG_GROWABLE)) - goto out_unlock; - - /* Would overflow the VA region */ - if (new_pages > reg->nr_pages) - goto out_unlock; - - /* can't be mapped more than once on the GPU */ - if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) - goto out_unlock; - /* can't grow regions which are ephemeral */ - if (reg->flags & KBASE_REG_DONT_NEED) - goto out_unlock; - - if (new_pages == reg->gpu_alloc->nents) { - /* no change */ - res = 0; - goto out_unlock; - } - - old_pages = kbase_reg_current_backed_size(reg); - if (new_pages > old_pages) { - delta = new_pages - old_pages; - - /* - * No update to the mm so downgrade the writer lock to a read - * lock so other readers aren't blocked after this point. - */ - downgrade_write(¤t->mm->mmap_sem); - read_locked = true; - - /* Allocate some more pages */ - if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { - res = -ENOMEM; - goto out_unlock; - } - if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper( - reg->gpu_alloc, delta) != 0) { - res = -ENOMEM; - kbase_free_phy_pages_helper(reg->cpu_alloc, - delta); - goto out_unlock; - } - } - - /* No update required for CPU mappings, that's done on fault. */ - - /* Update GPU mapping. */ - res = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages); - - /* On error free the new pages */ - if (res) { - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, - delta); - res = -ENOMEM; - goto out_unlock; - } - } else { - delta = old_pages - new_pages; - - /* Update all CPU mapping(s) */ - kbase_mem_shrink_cpu_mapping(kctx, reg, - new_pages, old_pages); - - /* Update the GPU mapping */ - res = kbase_mem_shrink_gpu_mapping(kctx, reg, - new_pages, old_pages); - if (res) { - res = -ENOMEM; - goto out_unlock; - } - - kbase_free_phy_pages_helper(reg->cpu_alloc, delta); - if (reg->cpu_alloc != reg->gpu_alloc) - kbase_free_phy_pages_helper(reg->gpu_alloc, delta); - } - -out_unlock: - kbase_gpu_vm_unlock(kctx); - if (read_locked) - up_read(¤t->mm->mmap_sem); - else - up_write(¤t->mm->mmap_sem); - - return res; -} - -static void kbase_cpu_vm_open(struct vm_area_struct *vma) -{ - struct kbase_cpu_mapping *map = vma->vm_private_data; - - KBASE_DEBUG_ASSERT(map); - KBASE_DEBUG_ASSERT(map->count > 0); - /* non-atomic as we're under Linux' mm lock */ - map->count++; -} - -static void kbase_cpu_vm_close(struct vm_area_struct *vma) -{ - struct kbase_cpu_mapping *map = vma->vm_private_data; - - KBASE_DEBUG_ASSERT(map); - KBASE_DEBUG_ASSERT(map->count > 0); - - /* non-atomic as we're under Linux' mm lock */ - if (--map->count) - return; - - KBASE_DEBUG_ASSERT(map->kctx); - KBASE_DEBUG_ASSERT(map->alloc); - - kbase_gpu_vm_lock(map->kctx); - - if (map->free_on_close) { - KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_SAME_VA); - /* Avoid freeing memory on the process death which results in - * GPU Page Fault. Memory will be freed in kbase_destroy_context - */ - if (!(current->flags & PF_EXITING)) - kbase_mem_free_region(map->kctx, map->region); - } - - list_del(&map->mappings_list); - - kbase_gpu_vm_unlock(map->kctx); - - kbase_mem_phy_alloc_put(map->alloc); - kfree(map); -} - -KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); - - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) -static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ -#else -static int kbase_cpu_vm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; -#endif - struct kbase_cpu_mapping *map = vma->vm_private_data; - pgoff_t rel_pgoff; - size_t i; - pgoff_t addr; - - KBASE_DEBUG_ASSERT(map); - KBASE_DEBUG_ASSERT(map->count > 0); - KBASE_DEBUG_ASSERT(map->kctx); - KBASE_DEBUG_ASSERT(map->alloc); - - rel_pgoff = vmf->pgoff - map->region->start_pfn; - - kbase_gpu_vm_lock(map->kctx); - if (rel_pgoff >= map->alloc->nents) - goto locked_bad_fault; - - /* Fault on access to DONT_NEED regions */ - if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) - goto locked_bad_fault; - - /* insert all valid pages from the fault location */ - i = rel_pgoff; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT); -#else - addr = (pgoff_t)(vmf->address >> PAGE_SHIFT); -#endif - while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) { - int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT, - PFN_DOWN(as_phys_addr_t(map->alloc->pages[i]))); - if (ret < 0 && ret != -EBUSY) - goto locked_bad_fault; - - i++; addr++; - } - - kbase_gpu_vm_unlock(map->kctx); - /* we resolved it, nothing for VM to do */ - return VM_FAULT_NOPAGE; - -locked_bad_fault: - kbase_gpu_vm_unlock(map->kctx); - return VM_FAULT_SIGBUS; -} - -const struct vm_operations_struct kbase_vm_ops = { - .open = kbase_cpu_vm_open, - .close = kbase_cpu_vm_close, - .fault = kbase_cpu_vm_fault -}; - -static int kbase_cpu_mmap(struct kbase_context *kctx, - struct kbase_va_region *reg, - struct vm_area_struct *vma, - void *kaddr, - size_t nr_pages, - unsigned long aligned_offset, - int free_on_close) -{ - struct kbase_cpu_mapping *map; - struct tagged_addr *page_array; - int err = 0; - int i; - u64 start_off; - - map = kzalloc(sizeof(*map), GFP_KERNEL); - - if (!map) { - WARN_ON(1); - err = -ENOMEM; - goto out; - } - - /* - * VM_DONTCOPY - don't make this mapping available in fork'ed processes - * VM_DONTEXPAND - disable mremap on this region - * VM_IO - disables paging - * VM_DONTDUMP - Don't include in core dumps (3.7 only) - * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. - * This is needed to support using the dedicated and - * the OS based memory backends together. - */ - /* - * This will need updating to propagate coherency flags - * See MIDBASE-1057 - */ - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; -#else - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; -#endif - vma->vm_ops = &kbase_vm_ops; - vma->vm_private_data = map; - - page_array = kbase_get_cpu_phy_pages(reg); - start_off = vma->vm_pgoff - reg->start_pfn + - (aligned_offset >> PAGE_SHIFT); - if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { - struct kbase_aliased *aliased = - reg->cpu_alloc->imported.alias.aliased; - - if (!reg->cpu_alloc->imported.alias.stride || - reg->nr_pages < (start_off + nr_pages)) { - err = -EINVAL; - goto out; - } - - while (start_off >= reg->cpu_alloc->imported.alias.stride) { - aliased++; - start_off -= reg->cpu_alloc->imported.alias.stride; - } - - if (!aliased->alloc) { - /* sink page not available for dumping map */ - err = -EINVAL; - goto out; - } - - if ((start_off + nr_pages) > aliased->length) { - /* not fully backed by physical pages */ - err = -EINVAL; - goto out; - } - - /* ready the pages for dumping map */ - page_array = aliased->alloc->pages + aliased->offset; - } - - if (!(reg->flags & KBASE_REG_CPU_CACHED) && - (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { - /* We can't map vmalloc'd memory uncached. - * Other memory will have been returned from - * kbase_mem_pool which would be - * suitable for mapping uncached. - */ - BUG_ON(kaddr); - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - } - - if (!kaddr) { - unsigned long addr = vma->vm_start + aligned_offset; - - vma->vm_flags |= VM_PFNMAP; - for (i = 0; i < nr_pages; i++) { - phys_addr_t phys; - - phys = as_phys_addr_t(page_array[i + start_off]); - err = vm_insert_pfn(vma, addr, PFN_DOWN(phys)); - if (WARN_ON(err)) - break; - - addr += PAGE_SIZE; - } - } else { - WARN_ON(aligned_offset); - /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ - vma->vm_flags |= VM_MIXEDMAP; - /* vmalloc remaping is easy... */ - err = remap_vmalloc_range(vma, kaddr, 0); - WARN_ON(err); - } - - if (err) { - kfree(map); - goto out; - } - - map->region = reg; - map->free_on_close = free_on_close; - map->kctx = kctx; - map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - map->count = 1; /* start with one ref */ - - if (reg->flags & KBASE_REG_CPU_CACHED) - map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - - list_add(&map->mappings_list, &map->alloc->mappings); - - out: - return err; -} - -static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) -{ - struct kbase_va_region *new_reg; - void *kaddr; - u32 nr_pages; - size_t size; - int err = 0; - - dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); - size = (vma->vm_end - vma->vm_start); - nr_pages = size >> PAGE_SHIFT; - - kaddr = kbase_mmu_dump(kctx, nr_pages); - - if (!kaddr) { - err = -ENOMEM; - goto out; - } - - new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, - KBASE_REG_ZONE_SAME_VA); - if (!new_reg) { - err = -ENOMEM; - WARN_ON(1); - goto out; - } - - new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW); - if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { - err = -ENOMEM; - new_reg->cpu_alloc = NULL; - WARN_ON(1); - goto out_no_alloc; - } - - new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); - - new_reg->flags &= ~KBASE_REG_FREE; - new_reg->flags |= KBASE_REG_CPU_CACHED; - if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { - err = -ENOMEM; - WARN_ON(1); - goto out_va_region; - } - - *kmap_addr = kaddr; - *reg = new_reg; - - dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); - return 0; - -out_no_alloc: -out_va_region: - kbase_free_alloced_region(new_reg); -out: - return err; -} - - -void kbase_os_mem_map_lock(struct kbase_context *kctx) -{ - struct mm_struct *mm = current->mm; - (void)kctx; - down_read(&mm->mmap_sem); -} - -void kbase_os_mem_map_unlock(struct kbase_context *kctx) -{ - struct mm_struct *mm = current->mm; - (void)kctx; - up_read(&mm->mmap_sem); -} - -static int kbasep_reg_mmap(struct kbase_context *kctx, - struct vm_area_struct *vma, - struct kbase_va_region **regm, - size_t *nr_pages, size_t *aligned_offset) - -{ - int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); - struct kbase_va_region *reg; - int err = 0; - - *aligned_offset = 0; - - dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); - - /* SAME_VA stuff, fetch the right region */ - reg = kctx->pending_regions[cookie]; - if (!reg) { - err = -ENOMEM; - goto out; - } - - if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { - /* incorrect mmap size */ - /* leave the cookie for a potential later - * mapping, or to be reclaimed later when the - * context is freed */ - err = -ENOMEM; - goto out; - } - - if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || - (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { - /* VM flags inconsistent with region flags */ - err = -EPERM; - dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", - __FILE__, __LINE__); - goto out; - } - - /* adjust down nr_pages to what we have physically */ - *nr_pages = kbase_reg_current_backed_size(reg); - - if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, - reg->nr_pages, 1) != 0) { - dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); - /* Unable to map in GPU space. */ - WARN_ON(1); - err = -ENOMEM; - goto out; - } - /* no need for the cookie anymore */ - kctx->pending_regions[cookie] = NULL; - kctx->cookies |= (1UL << cookie); - - /* - * Overwrite the offset with the region start_pfn, so we effectively - * map from offset 0 in the region. However subtract the aligned - * offset so that when user space trims the mapping the beginning of - * the trimmed VMA has the correct vm_pgoff; - */ - vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); -out: - *regm = reg; - dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); - - return err; -} - -int kbase_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct kbase_context *kctx = file->private_data; - struct kbase_va_region *reg = NULL; - void *kaddr = NULL; - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - int err = 0; - int free_on_close = 0; - struct device *dev = kctx->kbdev->dev; - size_t aligned_offset = 0; - - dev_dbg(dev, "kbase_mmap\n"); - - if (!(vma->vm_flags & VM_READ)) - vma->vm_flags &= ~VM_MAYREAD; - if (!(vma->vm_flags & VM_WRITE)) - vma->vm_flags &= ~VM_MAYWRITE; - - if (0 == nr_pages) { - err = -EINVAL; - goto out; - } - - if (!(vma->vm_flags & VM_SHARED)) { - err = -EINVAL; - goto out; - } - - kbase_gpu_vm_lock(kctx); - - if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { - /* The non-mapped tracking helper page */ - err = kbase_tracking_page_setup(kctx, vma); - goto out_unlock; - } - - /* if not the MTP, verify that the MTP has been mapped */ - rcu_read_lock(); - /* catches both when the special page isn't present or - * when we've forked */ - if (rcu_dereference(kctx->process_mm) != current->mm) { - err = -EINVAL; - rcu_read_unlock(); - goto out_unlock; - } - rcu_read_unlock(); - - switch (vma->vm_pgoff) { - case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): - case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): - /* Illegal handle for direct map */ - err = -EINVAL; - goto out_unlock; - case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): - /* MMU dump */ - err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); - if (0 != err) - goto out_unlock; - /* free the region on munmap */ - free_on_close = 1; - break; - case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... - PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { - err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, - &aligned_offset); - if (0 != err) - goto out_unlock; - /* free the region on munmap */ - free_on_close = 1; - break; - } - default: { - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - (u64)vma->vm_pgoff << PAGE_SHIFT); - - if (reg && !(reg->flags & KBASE_REG_FREE)) { - /* will this mapping overflow the size of the region? */ - if (nr_pages > (reg->nr_pages - - (vma->vm_pgoff - reg->start_pfn))) { - err = -ENOMEM; - goto out_unlock; - } - - if ((vma->vm_flags & VM_READ && - !(reg->flags & KBASE_REG_CPU_RD)) || - (vma->vm_flags & VM_WRITE && - !(reg->flags & KBASE_REG_CPU_WR))) { - /* VM flags inconsistent with region flags */ - err = -EPERM; - dev_err(dev, "%s:%d inconsistent VM flags\n", - __FILE__, __LINE__); - goto out_unlock; - } - -#ifdef CONFIG_DMA_SHARED_BUFFER - if (KBASE_MEM_TYPE_IMPORTED_UMM == - reg->cpu_alloc->type) { - if (0 != (vma->vm_pgoff - reg->start_pfn)) { - err = -EINVAL; - dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", - __FILE__, __LINE__); - goto out_unlock; - } - err = dma_buf_mmap( - reg->cpu_alloc->imported.umm.dma_buf, - vma, vma->vm_pgoff - reg->start_pfn); - goto out_unlock; - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - - if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { - /* initial params check for aliased dumping map */ - if (nr_pages > reg->gpu_alloc->imported.alias.stride || - !reg->gpu_alloc->imported.alias.stride || - !nr_pages) { - err = -EINVAL; - dev_warn(dev, "mmap aliased: invalid params!\n"); - goto out_unlock; - } - } - else if (reg->cpu_alloc->nents < - (vma->vm_pgoff - reg->start_pfn + nr_pages)) { - /* limit what we map to the amount currently backed */ - if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) - nr_pages = 0; - else - nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); - } - } else { - err = -ENOMEM; - goto out_unlock; - } - } /* default */ - } /* switch */ - - err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, - free_on_close); - - if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { - /* MMU dump - userspace should now have a reference on - * the pages, so we can now free the kernel mapping */ - vfree(kaddr); - } - -out_unlock: - kbase_gpu_vm_unlock(kctx); -out: - if (err) - dev_err(dev, "mmap failed %d\n", err); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_mmap); - -void kbase_sync_mem_regions(struct kbase_context *kctx, - struct kbase_vmap_struct *map, enum kbase_sync_type dest) -{ - size_t i; - off_t const offset = map->offset_in_page; - size_t const page_count = PFN_UP(offset + map->size); - - /* Sync first page */ - size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); - struct tagged_addr cpu_pa = map->cpu_pages[0]; - struct tagged_addr gpu_pa = map->gpu_pages[0]; - - kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest); - - /* Sync middle pages (if any) */ - for (i = 1; page_count > 2 && i < page_count - 1; i++) { - cpu_pa = map->cpu_pages[i]; - gpu_pa = map->gpu_pages[i]; - kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest); - } - - /* Sync last page (if any) */ - if (page_count > 1) { - cpu_pa = map->cpu_pages[page_count - 1]; - gpu_pa = map->gpu_pages[page_count - 1]; - sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1; - kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest); - } -} - -static int kbase_vmap_phy_pages(struct kbase_context *kctx, - struct kbase_va_region *reg, u64 offset_bytes, size_t size, - struct kbase_vmap_struct *map) -{ - unsigned long page_index; - unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; - size_t page_count = PFN_UP(offset_in_page + size); - struct tagged_addr *page_array; - struct page **pages; - void *cpu_addr = NULL; - pgprot_t prot; - size_t i; - - if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) - return -EINVAL; - - /* check if page_count calculation will wrap */ - if (size > ((size_t)-1 / PAGE_SIZE)) - return -EINVAL; - - page_index = offset_bytes >> PAGE_SHIFT; - - /* check if page_index + page_count will wrap */ - if (-1UL - page_count < page_index) - return -EINVAL; - - if (page_index + page_count > kbase_reg_current_backed_size(reg)) - return -ENOMEM; - - if (reg->flags & KBASE_REG_DONT_NEED) - return -EINVAL; - - prot = PAGE_KERNEL; - if (!(reg->flags & KBASE_REG_CPU_CACHED)) { - /* Map uncached */ - prot = pgprot_writecombine(prot); - } - - page_array = kbase_get_cpu_phy_pages(reg); - if (!page_array) - return -ENOMEM; - - pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - for (i = 0; i < page_count; i++) - pages[i] = as_page(page_array[page_index + i]); - - /* Note: enforcing a RO prot_request onto prot is not done, since: - * - CPU-arch-specific integration required - * - kbase_vmap() requires no access checks to be made/enforced */ - - cpu_addr = vmap(pages, page_count, VM_MAP, prot); - - kfree(pages); - - if (!cpu_addr) - return -ENOMEM; - - map->offset_in_page = offset_in_page; - map->cpu_alloc = reg->cpu_alloc; - map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; - map->gpu_alloc = reg->gpu_alloc; - map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; - map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); - map->size = size; - map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && - !kbase_mem_is_imported(map->gpu_alloc->type); - - if (map->sync_needed) - kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); - - return 0; -} - -void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, - unsigned long prot_request, struct kbase_vmap_struct *map) -{ - struct kbase_va_region *reg; - void *addr = NULL; - u64 offset_bytes; - struct kbase_mem_phy_alloc *cpu_alloc; - struct kbase_mem_phy_alloc *gpu_alloc; - int err; - - kbase_gpu_vm_lock(kctx); - - reg = kbase_region_tracker_find_region_enclosing_address(kctx, - gpu_addr); - if (!reg || (reg->flags & KBASE_REG_FREE)) - goto out_unlock; - - /* check access permissions can be satisfied - * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} - */ - if ((reg->flags & prot_request) != prot_request) - goto out_unlock; - - offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); - cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); - gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - - err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); - if (err < 0) - goto fail_vmap_phy_pages; - - addr = map->addr; - -out_unlock: - kbase_gpu_vm_unlock(kctx); - return addr; - -fail_vmap_phy_pages: - kbase_gpu_vm_unlock(kctx); - kbase_mem_phy_alloc_put(cpu_alloc); - kbase_mem_phy_alloc_put(gpu_alloc); - - return NULL; -} - -void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map) -{ - /* 0 is specified for prot_request to indicate no access checks should - * be made. - * - * As mentioned in kbase_vmap_prot() this means that a kernel-side - * CPU-RO mapping is not enforced to allow this to work */ - return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); -} -KBASE_EXPORT_TEST_API(kbase_vmap); - -static void kbase_vunmap_phy_pages(struct kbase_context *kctx, - struct kbase_vmap_struct *map) -{ - void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); - vunmap(addr); - - if (map->sync_needed) - kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); - - map->offset_in_page = 0; - map->cpu_pages = NULL; - map->gpu_pages = NULL; - map->addr = NULL; - map->size = 0; - map->sync_needed = false; -} - -void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) -{ - kbase_vunmap_phy_pages(kctx, map); - map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); - map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); -} -KBASE_EXPORT_TEST_API(kbase_vunmap); - -void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) -{ - struct mm_struct *mm; - - rcu_read_lock(); - mm = rcu_dereference(kctx->process_mm); - if (mm) { - atomic_add(pages, &kctx->nonmapped_pages); -#ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, pages); -#else - spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, pages); - spin_unlock(&mm->page_table_lock); -#endif - } - rcu_read_unlock(); -} - -static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) -{ - int pages; - struct mm_struct *mm; - - spin_lock(&kctx->mm_update_lock); - mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); - if (!mm) { - spin_unlock(&kctx->mm_update_lock); - return; - } - - rcu_assign_pointer(kctx->process_mm, NULL); - spin_unlock(&kctx->mm_update_lock); - synchronize_rcu(); - - pages = atomic_xchg(&kctx->nonmapped_pages, 0); -#ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_FILEPAGES, -pages); -#else - spin_lock(&mm->page_table_lock); - add_mm_counter(mm, MM_FILEPAGES, -pages); - spin_unlock(&mm->page_table_lock); -#endif -} - -static void kbase_special_vm_close(struct vm_area_struct *vma) -{ - struct kbase_context *kctx; - - kctx = vma->vm_private_data; - kbasep_os_process_page_usage_drain(kctx); -} - -static const struct vm_operations_struct kbase_vm_special_ops = { - .close = kbase_special_vm_close, -}; - -static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) -{ - /* check that this is the only tracking page */ - spin_lock(&kctx->mm_update_lock); - if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { - spin_unlock(&kctx->mm_update_lock); - return -EFAULT; - } - - rcu_assign_pointer(kctx->process_mm, current->mm); - - spin_unlock(&kctx->mm_update_lock); - - /* no real access */ - vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; -#else - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; -#endif - vma->vm_ops = &kbase_vm_special_ops; - vma->vm_private_data = kctx; - - return 0; -} - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.h deleted file mode 100755 index 5cb88d19426a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_linux.h +++ /dev/null @@ -1,443 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mem_linux.h - * Base kernel memory APIs, Linux implementation. - */ - -#ifndef _KBASE_MEM_LINUX_H_ -#define _KBASE_MEM_LINUX_H_ - -/** A HWC dump mapping */ -struct kbase_hwc_dma_mapping { - void *cpu_va; - dma_addr_t dma_pa; - size_t size; -}; - -/** - * kbase_mem_alloc - Create a new allocation for GPU - * - * @kctx: The kernel context - * @va_pages: The number of pages of virtual address space to reserve - * @commit_pages: The number of physical pages to allocate upfront - * @extent: The number of extra pages to allocate on each GPU fault which - * grows the region. - * @flags: bitmask of BASE_MEM_* flags to convey special requirements & - * properties for the new allocation. - * @gpu_va: Start address of the memory region which was allocated from GPU - * virtual address space. - * - * Return: 0 on success or error code - */ -struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va); - -/** - * kbase_mem_query - Query properties of a GPU memory region - * - * @kctx: The kernel context - * @gpu_addr: A GPU address contained within the memory region - * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be - * regarding the amount of backing physical memory allocated so far - * for the region or the size of the region or the flags associated - * with the region. - * @out: Pointer to the location to store the result of query. - * - * Return: 0 on success or error code - */ -int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, - u64 *const out); - -/** - * kbase_mem_import - Import the external memory for use by the GPU - * - * @kctx: The kernel context - * @type: Type of external memory - * @phandle: Handle to the external memory interpreted as per the type. - * @padding: Amount of extra VA pages to append to the imported buffer - * @gpu_va: GPU address assigned to the imported external memory - * @va_pages: Size of the memory region reserved from the GPU address space - * @flags: bitmask of BASE_MEM_* flags to convey special requirements & - * properties for the new allocation representing the external - * memory. - * Return: 0 on success or error code - */ -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, - void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, - u64 *flags); - -/** - * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more - * memory regions - * - * @kctx: The kernel context - * @flags: bitmask of BASE_MEM_* flags. - * @stride: Bytes between start of each memory region - * @nents: The number of regions to pack together into the alias - * @ai: Pointer to the struct containing the memory aliasing info - * @num_pages: Number of pages the alias will cover - * - * Return: 0 on failure or otherwise the GPU VA for the alias - */ -u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); - -/** - * kbase_mem_flags_change - Change the flags for a memory region - * - * @kctx: The kernel context - * @gpu_addr: A GPU address contained within the memory region to modify. - * @flags: The new flags to set - * @mask: Mask of the flags, from BASE_MEM_*, to modify. - * - * Return: 0 on success or error code - */ -int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); - -/** - * kbase_mem_commit - Change the physical backing size of a region - * - * @kctx: The kernel context - * @gpu_addr: Handle to the memory region - * @new_pages: Number of physical pages to back the region with - * - * Return: 0 on success or error code - */ -int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); - -/** - * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on - * device file /dev/malixx. - * @file: Pointer to the device file /dev/malixx instance. - * @vma: Pointer to the struct containing the info where the GPU allocation - * will be mapped in virtual address space of CPU. - * - * Return: 0 on success or error code - */ -int kbase_mmap(struct file *file, struct vm_area_struct *vma); - -/** - * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction - * mechanism. - * @kctx: The kbase context to initialize. - * - * Return: Zero on success or -errno on failure. - */ -int kbase_mem_evictable_init(struct kbase_context *kctx); - -/** - * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction - * mechanism. - * @kctx: The kbase context to de-initialize. - */ -void kbase_mem_evictable_deinit(struct kbase_context *kctx); - -/** - * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region - * @new_pages: The number of pages after the grow - * @old_pages: The number of pages before the grow - * - * Return: 0 on success, -errno on error. - * - * Expand the GPU mapping to encompass the new psychical pages which have - * been added to the allocation. - * - * Note: Caller must be holding the region lock. - */ -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_mem_evictable_make - Make a physical allocation eligible for eviction - * @gpu_alloc: The physical allocation to make evictable - * - * Return: 0 on success, -errno on error. - * - * Take the provided region and make all the physical pages within it - * reclaimable by the kernel, updating the per-process VM stats as well. - * Remove any CPU mappings (as these can't be removed in the shrinker callback - * as mmap_sem might already be taken) but leave the GPU mapping intact as - * and until the shrinker reclaims the allocation. - * - * Note: Must be called with the region lock of the containing context. - */ -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); - -/** - * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for - * eviction. - * @alloc: The physical allocation to remove eviction eligibility from. - * - * Return: True if the allocation had its backing restored and false if - * it hasn't. - * - * Make the physical pages in the region no longer reclaimable and update the - * per-process stats, if the shrinker has already evicted the memory then - * re-allocate it if the region is still alive. - * - * Note: Must be called with the region lock of the containing context. - */ -bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); - -struct kbase_vmap_struct { - off_t offset_in_page; - struct kbase_mem_phy_alloc *cpu_alloc; - struct kbase_mem_phy_alloc *gpu_alloc; - struct tagged_addr *cpu_pages; - struct tagged_addr *gpu_pages; - void *addr; - size_t size; - bool sync_needed; -}; - - -/** - * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the - * requested access permissions are supported - * @kctx: Context the VA range belongs to - * @gpu_addr: Start address of VA range - * @size: Size of VA range - * @prot_request: Flags indicating how the caller will then access the memory - * @map: Structure to be given to kbase_vunmap() on freeing - * - * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error - * - * Map a GPU VA Range into the kernel. The VA range must be contained within a - * GPU memory region. Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * This is safer than using kmap() on the pages directly, - * because the pages here are refcounted to prevent freeing (and hence reuse - * elsewhere in the system) until an kbase_vunmap() - * - * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check - * whether the region should allow the intended access, and return an error if - * disallowed. This is essential for security of imported memory, particularly - * a user buf from SHM mapped into the process as RO. In that case, write - * access must be checked if the intention is for kernel to write to the - * memory. - * - * The checks are also there to help catch access errors on memory where - * security is not a concern: imported memory that is always RW, and memory - * that was allocated and owned by the process attached to @kctx. In this case, - * it helps to identify memory that was was mapped with the wrong access type. - * - * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases - * where either the security of memory is solely dependent on those flags, or - * when userspace code was expecting only the GPU to access the memory (e.g. HW - * workarounds). - * - * All cache maintenance operations shall be ignored if the - * memory region has been imported. - * - */ -void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, - unsigned long prot_request, struct kbase_vmap_struct *map); - -/** - * kbase_vmap - Map a GPU VA range into the kernel safely - * @kctx: Context the VA range belongs to - * @gpu_addr: Start address of VA range - * @size: Size of VA range - * @map: Structure to be given to kbase_vunmap() on freeing - * - * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error - * - * Map a GPU VA Range into the kernel. The VA range must be contained within a - * GPU memory region. Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * This is safer than using kmap() on the pages directly, - * because the pages here are refcounted to prevent freeing (and hence reuse - * elsewhere in the system) until an kbase_vunmap() - * - * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no - * checks to ensure the security of e.g. imported user bufs from RO SHM. - * - * Note: All cache maintenance operations shall be ignored if the memory region - * has been imported. - */ -void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map); - -/** - * kbase_vunmap - Unmap a GPU VA range from the kernel - * @kctx: Context the VA range belongs to - * @map: Structure describing the mapping from the corresponding kbase_vmap() - * call - * - * Unmaps a GPU VA range from the kernel, given its @map structure obtained - * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * The reference taken on pages during kbase_vmap() is released. - * - * Note: All cache maintenance operations shall be ignored if the memory region - * has been imported. - */ -void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); - -extern const struct vm_operations_struct kbase_vm_ops; - -/** - * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode - * CPU mapping. - * @kctx: Context the CPU mapping belongs to. - * @map: Structure describing the CPU mapping, setup previously by the - * kbase_vmap() call. - * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) - * - * Note: The caller shall ensure that CPU mapping is not revoked & remains - * active whilst the maintenance is in progress. - */ -void kbase_sync_mem_regions(struct kbase_context *kctx, - struct kbase_vmap_struct *map, enum kbase_sync_type dest); - -/** - * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Shrink (or completely remove) all CPU mappings which reference the shrunk - * part of the allocation. - */ -void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a - * physical allocation - * @kctx: The kernel base context associated with the mapping - * @alloc: Pointer to the allocation to terminate - * - * This function will unmap the kernel mapping, and free any structures used to - * track it. - */ -void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc); - -/** - * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent - * mapping of a physical allocation - * @kctx: The kernel base context @gpu_addr will be looked up in - * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping - * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer - * which will be used for a call to - * kbase_phy_alloc_mapping_put() - * - * Return: Pointer to a kernel-side accessible location that directly - * corresponds to @gpu_addr, or NULL on failure - * - * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access - * that location kernel-side. Only certain kinds of memory have a permanent - * kernel mapping, refer to the internal functions - * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more - * information. - * - * If this function succeeds, a CPU access to the returned pointer will access - * the actual location represented by @gpu_addr. That is, the return value does - * not require any offset added to it to access the location specified in - * @gpu_addr - * - * The client must take care to either apply any necessary sync operations when - * accessing the data, or ensure that the enclosing region was coherent with - * the GPU, or uncached in the CPU. - * - * The refcount on the physical allocations backing the region are taken, so - * that they do not disappear whilst the client is accessing it. Once the - * client has finished accessing the memory, it must be released with a call to - * kbase_phy_alloc_mapping_put() - * - * Whilst this is expected to execute quickly (the mapping was already setup - * when the physical allocation was created), the call is not IRQ-safe due to - * the region lookup involved. - * - * An error code may indicate that: - * - a userside process has freed the allocation, and so @gpu_addr is no longer - * valid - * - the region containing @gpu_addr does not support a permanent kernel mapping - */ -void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, - struct kbase_vmap_struct **out_kern_mapping); - -/** - * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a - * physical allocation - * @kctx: The kernel base context associated with the mapping - * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained - * from a call to kbase_phy_alloc_mapping_get() - * - * Releases the reference to the allocations backing @kern_mapping that was - * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used - * when the client no longer needs to access the kernel-side CPU pointer. - * - * If this was the last reference on the underlying physical allocations, they - * will go through the normal allocation free steps, which also includes an - * unmap of the permanent kernel mapping for those allocations. - * - * Due to these operations, the function is not IRQ-safe. However it is - * expected to execute quickly in the normal case, i.e. when the region holding - * the physical allocation is still present. - */ -void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, - struct kbase_vmap_struct *kern_mapping); - -/** - * kbase_get_cache_line_alignment - Return cache line alignment - * - * Helper function to return the maximum cache line alignment considering - * both CPU and GPU cache sizes. - * - * Return: CPU and GPU cache line alignment, in bytes. - * - * @kbdev: Device pointer. - */ -u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); - -#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_lowlevel.h deleted file mode 100755 index 70116030f233..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_lowlevel.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _KBASE_MEM_LOWLEVEL_H -#define _KBASE_MEM_LOWLEVEL_H - -#ifndef _KBASE_H_ -#error "Don't include this file directly, use mali_kbase.h instead" -#endif - -#include - -/** - * @brief Flags for kbase_phy_allocator_pages_alloc - */ -#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ -#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ -#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ - -#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) - -#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ - -enum kbase_sync_type { - KBASE_SYNC_TO_CPU, - KBASE_SYNC_TO_DEVICE -}; - -struct tagged_addr { phys_addr_t tagged_addr; }; - -#define HUGE_PAGE (1u << 0) -#define HUGE_HEAD (1u << 1) -#define FROM_PARTIAL (1u << 2) - -/* - * Note: if macro for converting physical address to page is not defined - * in the kernel itself, it is defined hereby. This is to avoid build errors - * which are reported during builds for some architectures. - */ -#ifndef phys_to_page -#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) -#endif - -/** - * as_phys_addr_t - Retrieve the physical address from tagged address by - * masking the lower order 12 bits. - * @t: tagged address to be translated. - * - * Return: physical address corresponding to tagged address. - */ -static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) -{ - return t.tagged_addr & PAGE_MASK; -} - -/** - * as_page - Retrieve the struct page from a tagged address - * @t: tagged address to be translated. - * - * Return: pointer to struct page corresponding to tagged address. - */ -static inline struct page *as_page(struct tagged_addr t) -{ - return phys_to_page(as_phys_addr_t(t)); -} - -/** - * as_tagged - Convert the physical address to tagged address type though - * there is no tag info present, the lower order 12 bits will be 0 - * @phys: physical address to be converted to tagged type - * - * This is used for 4KB physical pages allocated by the Driver or imported pages - * and is needed as physical pages tracking object stores the reference for - * physical pages using tagged address type in lieu of the type generally used - * for physical addresses. - * - * Return: address of tagged address type. - */ -static inline struct tagged_addr as_tagged(phys_addr_t phys) -{ - struct tagged_addr t; - - t.tagged_addr = phys & PAGE_MASK; - return t; -} - -/** - * as_tagged_tag - Form the tagged address by storing the tag or metadata in the - * lower order 12 bits of physial address - * @phys: physical address to be converted to tagged address - * @tag: tag to be stored along with the physical address. - * - * The tag info is used while freeing up the pages - * - * Return: tagged address storing physical address & tag. - */ -static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) -{ - struct tagged_addr t; - - t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK); - return t; -} - -/** - * is_huge - Check if the physical page is one of the 512 4KB pages of the - * large page which was not split to be used partially - * @t: tagged address storing the tag in the lower order bits. - * - * Return: true if page belongs to large page, or false - */ -static inline bool is_huge(struct tagged_addr t) -{ - return t.tagged_addr & HUGE_PAGE; -} - -/** - * is_huge_head - Check if the physical page is the first 4KB page of the - * 512 4KB pages within a large page which was not split - * to be used partially - * @t: tagged address storing the tag in the lower order bits. - * - * Return: true if page is the first page of a large page, or false - */ -static inline bool is_huge_head(struct tagged_addr t) -{ - int mask = HUGE_HEAD | HUGE_PAGE; - - return mask == (t.tagged_addr & mask); -} - -/** - * is_partial - Check if the physical page is one of the 512 pages of the - * large page which was split in 4KB pages to be used - * partially for allocations >= 2 MB in size. - * @t: tagged address storing the tag in the lower order bits. - * - * Return: true if page was taken from large page used partially, or false - */ -static inline bool is_partial(struct tagged_addr t) -{ - return t.tagged_addr & FROM_PARTIAL; -} - -#endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool.c deleted file mode 100755 index 0f91be17a81b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool.c +++ /dev/null @@ -1,842 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define pool_dbg(pool, format, ...) \ - dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ - (pool->next_pool) ? "kctx" : "kbdev", \ - kbase_mem_pool_size(pool), \ - kbase_mem_pool_max_size(pool), \ - ##__VA_ARGS__) - -#define NOT_DIRTY false -#define NOT_RECLAIMED false - -static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) -{ - ssize_t max_size = kbase_mem_pool_max_size(pool); - ssize_t cur_size = kbase_mem_pool_size(pool); - - return max(max_size - cur_size, (ssize_t)0); -} - -static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) -{ - return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); -} - -static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) -{ - return kbase_mem_pool_size(pool) == 0; -} - -static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, - struct page *p) -{ - lockdep_assert_held(&pool->pool_lock); - - list_add(&p->lru, &pool->page_list); - pool->cur_size++; - - pool_dbg(pool, "added page\n"); -} - -static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) -{ - kbase_mem_pool_lock(pool); - kbase_mem_pool_add_locked(pool, p); - kbase_mem_pool_unlock(pool); -} - -static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, - struct list_head *page_list, size_t nr_pages) -{ - lockdep_assert_held(&pool->pool_lock); - - list_splice(page_list, &pool->page_list); - pool->cur_size += nr_pages; - - pool_dbg(pool, "added %zu pages\n", nr_pages); -} - -static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, - struct list_head *page_list, size_t nr_pages) -{ - kbase_mem_pool_lock(pool); - kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); - kbase_mem_pool_unlock(pool); -} - -static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) -{ - struct page *p; - - lockdep_assert_held(&pool->pool_lock); - - if (kbase_mem_pool_is_empty(pool)) - return NULL; - - p = list_first_entry(&pool->page_list, struct page, lru); - list_del_init(&p->lru); - pool->cur_size--; - - pool_dbg(pool, "removed page\n"); - - return p; -} - -static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) -{ - struct page *p; - - kbase_mem_pool_lock(pool); - p = kbase_mem_pool_remove_locked(pool); - kbase_mem_pool_unlock(pool); - - return p; -} - -static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, - struct page *p) -{ - struct device *dev = pool->kbdev->dev; - dma_sync_single_for_device(dev, kbase_dma_addr(p), - (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); -} - -static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, - struct page *p) -{ - int i; - - for (i = 0; i < (1U << pool->order); i++) - clear_highpage(p+i); - - kbase_mem_pool_sync_page(pool, p); -} - -static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, - struct page *p) -{ - /* Zero page before spilling */ - kbase_mem_pool_zero_page(next_pool, p); - - kbase_mem_pool_add(next_pool, p); -} - -struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) -{ - struct page *p; - gfp_t gfp; - struct device *dev = pool->kbdev->dev; - dma_addr_t dma_addr; - int i; - -#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) - /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ - gfp = GFP_USER | __GFP_ZERO; -#else - gfp = GFP_HIGHUSER | __GFP_ZERO; -#endif - - /* don't warn on higer order failures */ - if (pool->order) - gfp |= __GFP_NOWARN; - - p = alloc_pages(gfp, pool->order); - if (!p) - return NULL; - - dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) { - __free_pages(p, pool->order); - return NULL; - } - - WARN_ON(dma_addr != page_to_phys(p)); - for (i = 0; i < (1u << pool->order); i++) - kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i); - - return p; -} - -static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, - struct page *p) -{ - struct device *dev = pool->kbdev->dev; - dma_addr_t dma_addr = kbase_dma_addr(p); - int i; - - dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order), - DMA_BIDIRECTIONAL); - for (i = 0; i < (1u << pool->order); i++) - kbase_clear_dma_addr(p+i); - __free_pages(p, pool->order); - - pool_dbg(pool, "freed page to kernel\n"); -} - -static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, - size_t nr_to_shrink) -{ - struct page *p; - size_t i; - - lockdep_assert_held(&pool->pool_lock); - - for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { - p = kbase_mem_pool_remove_locked(pool); - kbase_mem_pool_free_page(pool, p); - } - - return i; -} - -static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, - size_t nr_to_shrink) -{ - size_t nr_freed; - - kbase_mem_pool_lock(pool); - nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); - kbase_mem_pool_unlock(pool); - - return nr_freed; -} - -int kbase_mem_pool_grow(struct kbase_mem_pool *pool, - size_t nr_to_grow) -{ - struct page *p; - size_t i; - - kbase_mem_pool_lock(pool); - - pool->dont_reclaim = true; - for (i = 0; i < nr_to_grow; i++) { - if (pool->dying) { - pool->dont_reclaim = false; - kbase_mem_pool_shrink_locked(pool, nr_to_grow); - kbase_mem_pool_unlock(pool); - - return -ENOMEM; - } - kbase_mem_pool_unlock(pool); - - p = kbase_mem_alloc_page(pool); - if (!p) { - kbase_mem_pool_lock(pool); - pool->dont_reclaim = false; - kbase_mem_pool_unlock(pool); - - return -ENOMEM; - } - - kbase_mem_pool_lock(pool); - kbase_mem_pool_add_locked(pool, p); - } - pool->dont_reclaim = false; - kbase_mem_pool_unlock(pool); - - return 0; -} - -void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) -{ - size_t cur_size; - int err = 0; - - cur_size = kbase_mem_pool_size(pool); - - if (new_size > pool->max_size) - new_size = pool->max_size; - - if (new_size < cur_size) - kbase_mem_pool_shrink(pool, cur_size - new_size); - else if (new_size > cur_size) - err = kbase_mem_pool_grow(pool, new_size - cur_size); - - if (err) { - size_t grown_size = kbase_mem_pool_size(pool); - - dev_warn(pool->kbdev->dev, - "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", - (new_size - cur_size), (grown_size - cur_size)); - } -} - -void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) -{ - size_t cur_size; - size_t nr_to_shrink; - - kbase_mem_pool_lock(pool); - - pool->max_size = max_size; - - cur_size = kbase_mem_pool_size(pool); - if (max_size < cur_size) { - nr_to_shrink = cur_size - max_size; - kbase_mem_pool_shrink_locked(pool, nr_to_shrink); - } - - kbase_mem_pool_unlock(pool); -} - - -static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_mem_pool *pool; - size_t pool_size; - - pool = container_of(s, struct kbase_mem_pool, reclaim); - - kbase_mem_pool_lock(pool); - if (pool->dont_reclaim && !pool->dying) { - kbase_mem_pool_unlock(pool); - return 0; - } - pool_size = kbase_mem_pool_size(pool); - kbase_mem_pool_unlock(pool); - - return pool_size; -} - -static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_mem_pool *pool; - unsigned long freed; - - pool = container_of(s, struct kbase_mem_pool, reclaim); - - kbase_mem_pool_lock(pool); - if (pool->dont_reclaim && !pool->dying) { - kbase_mem_pool_unlock(pool); - return 0; - } - - pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); - - freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); - - kbase_mem_pool_unlock(pool); - - pool_dbg(pool, "reclaim freed %ld pages\n", freed); - - return freed; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_pool_reclaim_count_objects(s, sc); - - return kbase_mem_pool_reclaim_scan_objects(s, sc); -} -#endif - -int kbase_mem_pool_init(struct kbase_mem_pool *pool, - size_t max_size, - size_t order, - struct kbase_device *kbdev, - struct kbase_mem_pool *next_pool) -{ - pool->cur_size = 0; - pool->max_size = max_size; - pool->order = order; - pool->kbdev = kbdev; - pool->next_pool = next_pool; - pool->dying = false; - - spin_lock_init(&pool->pool_lock); - INIT_LIST_HEAD(&pool->page_list); - - /* Register shrinker */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; -#else - pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; - pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; -#endif - pool->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) - pool->reclaim.batch = 0; -#endif - register_shrinker(&pool->reclaim); - - pool_dbg(pool, "initialized\n"); - - return 0; -} - -void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) -{ - kbase_mem_pool_lock(pool); - pool->dying = true; - kbase_mem_pool_unlock(pool); -} - -void kbase_mem_pool_term(struct kbase_mem_pool *pool) -{ - struct kbase_mem_pool *next_pool = pool->next_pool; - struct page *p, *tmp; - size_t nr_to_spill = 0; - LIST_HEAD(spill_list); - LIST_HEAD(free_list); - int i; - - pool_dbg(pool, "terminate()\n"); - - unregister_shrinker(&pool->reclaim); - - kbase_mem_pool_lock(pool); - pool->max_size = 0; - - if (next_pool && !kbase_mem_pool_is_full(next_pool)) { - /* Spill to next pool (may overspill) */ - nr_to_spill = kbase_mem_pool_capacity(next_pool); - nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); - - /* Zero pages first without holding the next_pool lock */ - for (i = 0; i < nr_to_spill; i++) { - p = kbase_mem_pool_remove_locked(pool); - list_add(&p->lru, &spill_list); - } - } - - while (!kbase_mem_pool_is_empty(pool)) { - /* Free remaining pages to kernel */ - p = kbase_mem_pool_remove_locked(pool); - list_add(&p->lru, &free_list); - } - - kbase_mem_pool_unlock(pool); - - if (next_pool && nr_to_spill) { - list_for_each_entry(p, &spill_list, lru) - kbase_mem_pool_zero_page(pool, p); - - /* Add new page list to next_pool */ - kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); - - pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); - } - - list_for_each_entry_safe(p, tmp, &free_list, lru) { - list_del_init(&p->lru); - kbase_mem_pool_free_page(pool, p); - } - - pool_dbg(pool, "terminated\n"); -} - -struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) -{ - struct page *p; - - do { - pool_dbg(pool, "alloc()\n"); - p = kbase_mem_pool_remove(pool); - - if (p) - return p; - - pool = pool->next_pool; - } while (pool); - - return NULL; -} - -struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) -{ - struct page *p; - - lockdep_assert_held(&pool->pool_lock); - - pool_dbg(pool, "alloc_locked()\n"); - p = kbase_mem_pool_remove_locked(pool); - - if (p) - return p; - - return NULL; -} - -void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, - bool dirty) -{ - struct kbase_mem_pool *next_pool = pool->next_pool; - - pool_dbg(pool, "free()\n"); - - if (!kbase_mem_pool_is_full(pool)) { - /* Add to our own pool */ - if (dirty) - kbase_mem_pool_sync_page(pool, p); - - kbase_mem_pool_add(pool, p); - } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { - /* Spill to next pool */ - kbase_mem_pool_spill(next_pool, p); - } else { - /* Free page */ - kbase_mem_pool_free_page(pool, p); - } -} - -void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, - bool dirty) -{ - pool_dbg(pool, "free_locked()\n"); - - lockdep_assert_held(&pool->pool_lock); - - if (!kbase_mem_pool_is_full(pool)) { - /* Add to our own pool */ - if (dirty) - kbase_mem_pool_sync_page(pool, p); - - kbase_mem_pool_add_locked(pool, p); - } else { - /* Free page */ - kbase_mem_pool_free_page(pool, p); - } -} - -int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, - struct tagged_addr *pages, bool partial_allowed) -{ - struct page *p; - size_t nr_from_pool; - size_t i = 0; - int err = -ENOMEM; - size_t nr_pages_internal; - - nr_pages_internal = nr_4k_pages / (1u << (pool->order)); - - if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) - return -EINVAL; - - pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages); - pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal); - - /* Get pages from this pool */ - kbase_mem_pool_lock(pool); - nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); - while (nr_from_pool--) { - int j; - p = kbase_mem_pool_remove_locked(pool); - if (pool->order) { - pages[i++] = as_tagged_tag(page_to_phys(p), - HUGE_HEAD | HUGE_PAGE); - for (j = 1; j < (1u << pool->order); j++) - pages[i++] = as_tagged_tag(page_to_phys(p) + - PAGE_SIZE * j, - HUGE_PAGE); - } else { - pages[i++] = as_tagged(page_to_phys(p)); - } - } - kbase_mem_pool_unlock(pool); - - if (i != nr_4k_pages && pool->next_pool) { - /* Allocate via next pool */ - err = kbase_mem_pool_alloc_pages(pool->next_pool, - nr_4k_pages - i, pages + i, partial_allowed); - - if (err < 0) - goto err_rollback; - - i += err; - } else { - /* Get any remaining pages from kernel */ - while (i != nr_4k_pages) { - p = kbase_mem_alloc_page(pool); - if (!p) { - if (partial_allowed) - goto done; - else - goto err_rollback; - } - - if (pool->order) { - int j; - - pages[i++] = as_tagged_tag(page_to_phys(p), - HUGE_PAGE | - HUGE_HEAD); - for (j = 1; j < (1u << pool->order); j++) { - phys_addr_t phys; - - phys = page_to_phys(p) + PAGE_SIZE * j; - pages[i++] = as_tagged_tag(phys, - HUGE_PAGE); - } - } else { - pages[i++] = as_tagged(page_to_phys(p)); - } - } - } - -done: - pool_dbg(pool, "alloc_pages(%zu) done\n", i); - return i; - -err_rollback: - kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); - return err; -} - -int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, - size_t nr_4k_pages, struct tagged_addr *pages) -{ - struct page *p; - size_t i; - size_t nr_pages_internal; - - lockdep_assert_held(&pool->pool_lock); - - nr_pages_internal = nr_4k_pages / (1u << (pool->order)); - - if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) - return -EINVAL; - - pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); - pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", - nr_pages_internal); - - if (kbase_mem_pool_size(pool) < nr_pages_internal) { - pool_dbg(pool, "Failed alloc\n"); - return -ENOMEM; - } - - for (i = 0; i < nr_pages_internal; i++) { - int j; - - p = kbase_mem_pool_remove_locked(pool); - if (pool->order) { - *pages++ = as_tagged_tag(page_to_phys(p), - HUGE_HEAD | HUGE_PAGE); - for (j = 1; j < (1u << pool->order); j++) { - *pages++ = as_tagged_tag(page_to_phys(p) + - PAGE_SIZE * j, - HUGE_PAGE); - } - } else { - *pages++ = as_tagged(page_to_phys(p)); - } - } - - return nr_4k_pages; -} - -static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, - bool zero, bool sync) -{ - struct page *p; - size_t nr_to_pool = 0; - LIST_HEAD(new_page_list); - size_t i; - - if (!nr_pages) - return; - - pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", - nr_pages, zero, sync); - - /* Zero/sync pages first without holding the pool lock */ - for (i = 0; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) - continue; - - if (is_huge_head(pages[i]) || !is_huge(pages[i])) { - p = as_page(pages[i]); - if (zero) - kbase_mem_pool_zero_page(pool, p); - else if (sync) - kbase_mem_pool_sync_page(pool, p); - - list_add(&p->lru, &new_page_list); - nr_to_pool++; - } - pages[i] = as_tagged(0); - } - - /* Add new page list to pool */ - kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); - - pool_dbg(pool, "add_array(%zu) added %zu pages\n", - nr_pages, nr_to_pool); -} - -static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, - bool zero, bool sync) -{ - struct page *p; - size_t nr_to_pool = 0; - LIST_HEAD(new_page_list); - size_t i; - - lockdep_assert_held(&pool->pool_lock); - - if (!nr_pages) - return; - - pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", - nr_pages, zero, sync); - - /* Zero/sync pages first */ - for (i = 0; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) - continue; - - if (is_huge_head(pages[i]) || !is_huge(pages[i])) { - p = as_page(pages[i]); - if (zero) - kbase_mem_pool_zero_page(pool, p); - else if (sync) - kbase_mem_pool_sync_page(pool, p); - - list_add(&p->lru, &new_page_list); - nr_to_pool++; - } - pages[i] = as_tagged(0); - } - - /* Add new page list to pool */ - kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); - - pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", - nr_pages, nr_to_pool); -} - -void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - struct tagged_addr *pages, bool dirty, bool reclaimed) -{ - struct kbase_mem_pool *next_pool = pool->next_pool; - struct page *p; - size_t nr_to_pool; - LIST_HEAD(to_pool_list); - size_t i = 0; - - pool_dbg(pool, "free_pages(%zu):\n", nr_pages); - - if (!reclaimed) { - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); - - kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); - - i += nr_to_pool; - - if (i != nr_pages && next_pool) { - /* Spill to next pool (may overspill) */ - nr_to_pool = kbase_mem_pool_capacity(next_pool); - nr_to_pool = min(nr_pages - i, nr_to_pool); - - kbase_mem_pool_add_array(next_pool, nr_to_pool, - pages + i, true, dirty); - i += nr_to_pool; - } - } - - /* Free any remaining pages to kernel */ - for (; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) - continue; - - if (is_huge(pages[i]) && !is_huge_head(pages[i])) { - pages[i] = as_tagged(0); - continue; - } - - p = as_page(pages[i]); - - kbase_mem_pool_free_page(pool, p); - pages[i] = as_tagged(0); - } - - pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); -} - - -void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, - size_t nr_pages, struct tagged_addr *pages, bool dirty, - bool reclaimed) -{ - struct page *p; - size_t nr_to_pool; - LIST_HEAD(to_pool_list); - size_t i = 0; - - lockdep_assert_held(&pool->pool_lock); - - pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); - - if (!reclaimed) { - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); - - kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, - dirty); - - i += nr_to_pool; - } - - /* Free any remaining pages to kernel */ - for (; i < nr_pages; i++) { - if (unlikely(!as_phys_addr_t(pages[i]))) - continue; - - if (is_huge(pages[i]) && !is_huge_head(pages[i])) { - pages[i] = as_tagged(0); - continue; - } - - p = as_page(pages[i]); - - kbase_mem_pool_free_page(pool, p); - pages[i] = as_tagged(0); - } - - pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.c deleted file mode 100755 index 4b4eeb32d2c1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include - -#include - -#ifdef CONFIG_DEBUG_FS - -static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val) -{ - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; - - *val = kbase_mem_pool_size(pool); - - return 0; -} - -static int kbase_mem_pool_debugfs_size_set(void *data, u64 val) -{ - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; - - kbase_mem_pool_trim(pool, val); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops, - kbase_mem_pool_debugfs_size_get, - kbase_mem_pool_debugfs_size_set, - "%llu\n"); - -static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val) -{ - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; - - *val = kbase_mem_pool_max_size(pool); - - return 0; -} - -static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val) -{ - struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data; - - kbase_mem_pool_set_max_size(pool, val); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops, - kbase_mem_pool_debugfs_max_size_get, - kbase_mem_pool_debugfs_max_size_set, - "%llu\n"); - -void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_mem_pool *pool, - struct kbase_mem_pool *lp_pool) -{ - debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent, - pool, &kbase_mem_pool_debugfs_size_fops); - - debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent, - pool, &kbase_mem_pool_debugfs_max_size_fops); - - debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent, - lp_pool, &kbase_mem_pool_debugfs_size_fops); - - debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent, - lp_pool, &kbase_mem_pool_debugfs_max_size_fops); -} - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.h deleted file mode 100755 index 990d91c8fbe7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_pool_debugfs.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KBASE_MEM_POOL_DEBUGFS_H -#define _KBASE_MEM_POOL_DEBUGFS_H - -#include - -/** - * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool - * @parent: Parent debugfs dentry - * @pool: Memory pool of small pages to control - * @lp_pool: Memory pool of large pages to control - * - * Adds four debugfs files under @parent: - * - mem_pool_size: get/set the current size of @pool - * - mem_pool_max_size: get/set the max size of @pool - * - lp_mem_pool_size: get/set the current size of @lp_pool - * - lp_mem_pool_max_size: get/set the max size of @lp_pool - */ -void kbase_mem_pool_debugfs_init(struct dentry *parent, - struct kbase_mem_pool *pool, - struct kbase_mem_pool *lp_pool); - -#endif /*_KBASE_MEM_POOL_DEBUGFS_H*/ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.c deleted file mode 100755 index d4f8433f4087..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include - -#ifdef CONFIG_DEBUG_FS - -/** Show callback for the @c mem_profile debugfs file. - * - * This function is called to get the contents of the @c mem_profile debugfs - * file. This is a report of current memory usage and distribution in userspace. - * - * @param sfile The debugfs entry - * @param data Data associated with the entry - * - * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise - */ -static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) -{ - struct kbase_context *kctx = sfile->private; - - mutex_lock(&kctx->mem_profile_lock); - - seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); - - seq_putc(sfile, '\n'); - - mutex_unlock(&kctx->mem_profile_lock); - - return 0; -} - -/* - * File operations related to debugfs entry for mem_profile - */ -static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) -{ - return single_open(file, kbasep_mem_profile_seq_show, in->i_private); -} - -static const struct file_operations kbasep_mem_profile_debugfs_fops = { - .open = kbasep_mem_profile_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) -{ - int err = 0; - - mutex_lock(&kctx->mem_profile_lock); - - dev_dbg(kctx->kbdev->dev, "initialised: %d", - kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); - - if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { - if (!debugfs_create_file("mem_profile", S_IRUGO, - kctx->kctx_dentry, kctx, - &kbasep_mem_profile_debugfs_fops)) { - err = -EAGAIN; - } else { - kbase_ctx_flag_set(kctx, - KCTX_MEM_PROFILE_INITIALIZED); - } - } - - if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { - kfree(kctx->mem_profile_data); - kctx->mem_profile_data = data; - kctx->mem_profile_size = size; - } else { - kfree(data); - } - - dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", - err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); - - mutex_unlock(&kctx->mem_profile_lock); - - return err; -} - -void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) -{ - mutex_lock(&kctx->mem_profile_lock); - - dev_dbg(kctx->kbdev->dev, "initialised: %d", - kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); - - kfree(kctx->mem_profile_data); - kctx->mem_profile_data = NULL; - kctx->mem_profile_size = 0; - - mutex_unlock(&kctx->mem_profile_lock); -} - -#else /* CONFIG_DEBUG_FS */ - -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) -{ - kfree(data); - return 0; -} -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.h deleted file mode 100755 index 1462247c3bca..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mem_profile_debugfs.h - * Header file for mem profiles entries in debugfs - * - */ - -#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H -#define _KBASE_MEM_PROFILE_DEBUGFS_H - -#include -#include - -/** - * @brief Remove entry from Mali memory profile debugfs - */ -void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); - -/** - * @brief Insert @p data to the debugfs file so it can be read by userspace - * - * The function takes ownership of @p data and frees it later when new data - * is inserted. - * - * If the debugfs entry corresponding to the @p kctx doesn't exist, - * an attempt will be made to create it. - * - * @param kctx The context whose debugfs file @p data should be inserted to - * @param data A NULL-terminated string to be inserted to the debugfs file, - * without the trailing new line character - * @param size The length of the @p data string - * @return 0 if @p data inserted correctly - * -EAGAIN in case of error - * @post @ref mem_profile_initialized will be set to @c true - * the first time this function succeeds. - */ -int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size); - -#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs_buf_size.h deleted file mode 100755 index 3c760717eef4..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_mem_profile_debugfs_buf_size.h - * Header file for the size of the buffer to accumulate the histogram report text in - */ - -#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ -#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ - -/** - * The size of the buffer to accumulate the histogram report text in - * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT - */ -#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \ - ((size_t) (64 + ((80 + (56 * 64)) * 35) + 56)) - -#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu.c deleted file mode 100755 index 84341ca18569..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu.c +++ /dev/null @@ -1,2658 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_mmu.c - * Base kernel MMU management. - */ - -/* #define DEBUG 1 */ -#include -#include -#include -#include -#if defined(CONFIG_MALI_GATOR_SUPPORT) -#include -#endif -#include -#include -#include - -#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) - -#include -#include -#include -#include -#include -#include - -#define KBASE_MMU_PAGE_ENTRIES 512 - -/** - * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * - * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. - * - * If sync is not set then transactions still in flight when the flush is issued - * may use the old page tables and the data they write will not be written out - * to memory, this function returns after the flush has been issued but - * before all accesses which might effect the flushed region have completed. - * - * If sync is set then accesses in the flushed region will be drained - * before data is flush and invalidated through L1, L2 and into memory, - * after which point this function will return. - */ -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync); - -/** - * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. - * @kbdev: Device pointer. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * @as_nr: GPU address space number for which flush + invalidate is required. - * - * This is used for MMU tables which do not belong to a user space context. - */ -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr); - -/** - * kbase_mmu_sync_pgd - sync page directory to memory - * @kbdev: Device pointer. - * @handle: Address of DMA region. - * @size: Size of the region to sync. - * - * This should be called after each page directory update. - */ - -static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, - dma_addr_t handle, size_t size) -{ - /* If page table is not coherent then ensure the gpu can read - * the pages from memory - */ - if (kbdev->system_coherency != COHERENCY_ACE) - dma_sync_single_for_device(kbdev->dev, handle, size, - DMA_TO_DEVICE); -} - -/* - * Definitions: - * - PGD: Page Directory. - * - PTE: Page Table Entry. A 64bit value pointing to the next - * level of translation - * - ATE: Address Transation Entry. A 64bit value pointing to - * a 4kB physical page. - */ - -static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault); - -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags); - -/** - * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to - * a region on a GPU page fault - * - * @reg: The region that will be backed with more pages - * @fault_rel_pfn: PFN of the fault relative to the start of the region - * - * This calculates how much to increase the backing of a region by, based on - * where a GPU page fault occurred and the flags in the region. - * - * This can be more than the minimum number of pages that would reach - * @fault_rel_pfn, for example to reduce the overall rate of page fault - * interrupts on a region, or to ensure that the end address is aligned. - * - * Return: the number of backed pages to increase by - */ -static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, - struct kbase_va_region *reg, size_t fault_rel_pfn) -{ - size_t multiple = reg->extent; - size_t reg_current_size = kbase_reg_current_backed_size(reg); - size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; - size_t remainder; - - if (!multiple) { - dev_warn(kbdev->dev, - "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", - ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); - return minimum_extra; - } - - /* Calculate the remainder to subtract from minimum_extra to make it - * the desired (rounded down) multiple of the extent. - * Depending on reg's flags, the base used for calculating multiples is - * different */ - if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { - /* multiple is based from the top of the initial commit, which - * has been allocated in such a way that (start_pfn + - * initial_commit) is already aligned to multiple. Hence the - * pfn for the end of committed memory will also be aligned to - * multiple */ - size_t initial_commit = reg->initial_commit; - - if (fault_rel_pfn < initial_commit) { - /* this case is just to catch in case it's been - * recommitted by userspace to be smaller than the - * initial commit */ - minimum_extra = initial_commit - reg_current_size; - remainder = 0; - } else { - /* same as calculating (fault_rel_pfn - initial_commit + 1) */ - size_t pages_after_initial = minimum_extra + reg_current_size - initial_commit; - - remainder = pages_after_initial % multiple; - } - } else { - /* multiple is based from the current backed size, even if the - * current backed size/pfn for end of committed memory are not - * themselves aligned to multiple */ - remainder = minimum_extra % multiple; - } - - if (remainder == 0) - return minimum_extra; - - return minimum_extra + multiple - remainder; -} - -#ifdef CONFIG_MALI_CINSTR_GWT -static void kbase_gpu_mmu_handle_write_faulting_as( - struct kbase_device *kbdev, - struct kbase_as *faulting_as, - u64 start_pfn, size_t nr, u32 op) -{ - mutex_lock(&kbdev->mmu_hw_mutex); - - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn, - nr, op, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); -} - -static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, - struct kbase_as *faulting_as) -{ - struct kbasep_gwt_list_element *pos; - struct kbase_va_region *region; - struct kbase_device *kbdev; - struct kbase_fault *fault; - u64 fault_pfn, pfn_offset; - u32 op; - int ret; - int as_no; - - as_no = faulting_as->number; - kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - fault = &faulting_as->pf_data; - fault_pfn = fault->addr >> PAGE_SHIFT; - - kbase_gpu_vm_lock(kctx); - - /* Find region and check if it should be writable. */ - region = kbase_region_tracker_find_region_enclosing_address(kctx, - fault->addr); - if (!region || region->flags & KBASE_REG_FREE) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU", - &faulting_as->pf_data); - return; - } - - if (!(region->flags & KBASE_REG_GPU_WR)) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Region does not have write permissions", - &faulting_as->pf_data); - return; - } - - /* Capture addresses of faulting write location - * for job dumping if write tracking is enabled. - */ - if (kctx->gwt_enabled) { - u64 page_addr = fault->addr & PAGE_MASK; - bool found = false; - /* Check if this write was already handled. */ - list_for_each_entry(pos, &kctx->gwt_current_list, link) { - if (page_addr == pos->page_addr) { - found = true; - break; - } - } - - if (!found) { - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (pos) { - pos->region = region; - pos->page_addr = page_addr; - pos->num_pages = 1; - list_add(&pos->link, &kctx->gwt_current_list); - } else { - dev_warn(kbdev->dev, "kmalloc failure"); - } - } - } - - pfn_offset = fault_pfn - region->start_pfn; - /* Now make this faulting page writable to GPU. */ - ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - 1, region->flags); - - /* flush L2 and unlock the VA (resumes the MMU) */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) - op = AS_COMMAND_FLUSH; - else - op = AS_COMMAND_FLUSH_PT; - - kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, - fault_pfn, 1, op); - - kbase_gpu_vm_unlock(kctx); -} - -static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, - struct kbase_as *faulting_as) -{ - struct kbase_fault *fault = &faulting_as->pf_data; - - switch (fault->status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { - case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: - case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: - kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); - break; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Execute Permission fault", fault); - break; - case AS_FAULTSTATUS_ACCESS_TYPE_READ: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Read Permission fault", fault); - break; - default: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown Permission fault", fault); - break; - } -} -#endif - -#define MAX_POOL_LEVEL 2 - -/** - * page_fault_try_alloc - Try to allocate memory from a context pool - * @kctx: Context pointer - * @region: Region to grow - * @new_pages: Number of 4 kB pages to allocate - * @pages_to_grow: Pointer to variable to store number of outstanding pages on - * failure. This can be either 4 kB or 2 MB pages, depending on - * the number of pages requested. - * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true - * for 2 MB, false for 4 kB. - * @prealloc_sas: Pointer to kbase_sub_alloc structures - * - * This function will try to allocate as many pages as possible from the context - * pool, then if required will try to allocate the remaining pages from the - * device pool. - * - * This function will not allocate any new memory beyond that that is already - * present in the context or device pools. This is because it is intended to be - * called with the vm_lock held, which could cause recursive locking if the - * allocation caused the out-of-memory killer to run. - * - * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be - * a count of 2 MB pages, otherwise it will be a count of 4 kB pages. - * - * Return: true if successful, false on failure - */ -static bool page_fault_try_alloc(struct kbase_context *kctx, - struct kbase_va_region *region, size_t new_pages, - int *pages_to_grow, bool *grow_2mb_pool, - struct kbase_sub_alloc **prealloc_sas) -{ - struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL}; - struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL}; - size_t pages_alloced[MAX_POOL_LEVEL] = {0}; - struct kbase_mem_pool *pool, *root_pool; - int pool_level = 0; - bool alloc_failed = false; - size_t pages_still_required; - -#ifdef CONFIG_MALI_2MB_ALLOC - if (new_pages >= (SZ_2M / SZ_4K)) { - root_pool = &kctx->lp_mem_pool; - *grow_2mb_pool = true; - } else { -#endif - root_pool = &kctx->mem_pool; - *grow_2mb_pool = false; -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - - if (region->gpu_alloc != region->cpu_alloc) - new_pages *= 2; - - pages_still_required = new_pages; - - /* Determine how many pages are in the pools before trying to allocate. - * Don't attempt to allocate & free if the allocation can't succeed. - */ - for (pool = root_pool; pool != NULL; pool = pool->next_pool) { - size_t pool_size_4k; - - kbase_mem_pool_lock(pool); - - pool_size_4k = kbase_mem_pool_size(pool) << pool->order; - if (pool_size_4k >= pages_still_required) - pages_still_required = 0; - else - pages_still_required -= pool_size_4k; - - kbase_mem_pool_unlock(pool); - - if (!pages_still_required) - break; - } - - if (pages_still_required) { - /* Insufficient pages in pools. Don't try to allocate - just - * request a grow. - */ - *pages_to_grow = pages_still_required; - - return false; - } - - /* Since we've dropped the pool locks, the amount of memory in the pools - * may change between the above check and the actual allocation. - */ - pool = root_pool; - for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) { - size_t pool_size_4k; - size_t pages_to_alloc_4k; - size_t pages_to_alloc_4k_per_alloc; - - kbase_mem_pool_lock(pool); - - /* Allocate as much as possible from this pool*/ - pool_size_4k = kbase_mem_pool_size(pool) << pool->order; - pages_to_alloc_4k = MIN(new_pages, pool_size_4k); - if (region->gpu_alloc == region->cpu_alloc) - pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; - else - pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; - - pages_alloced[pool_level] = pages_to_alloc_4k; - if (pages_to_alloc_4k) { - gpu_pages[pool_level] = - kbase_alloc_phy_pages_helper_locked( - region->gpu_alloc, pool, - pages_to_alloc_4k_per_alloc, - &prealloc_sas[0]); - - if (!gpu_pages[pool_level]) { - alloc_failed = true; - } else if (region->gpu_alloc != region->cpu_alloc) { - cpu_pages[pool_level] = - kbase_alloc_phy_pages_helper_locked( - region->cpu_alloc, pool, - pages_to_alloc_4k_per_alloc, - &prealloc_sas[1]); - - if (!cpu_pages[pool_level]) - alloc_failed = true; - } - } - - kbase_mem_pool_unlock(pool); - - if (alloc_failed) { - WARN_ON(!new_pages); - WARN_ON(pages_to_alloc_4k >= new_pages); - WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages); - break; - } - - new_pages -= pages_to_alloc_4k; - - if (!new_pages) - break; - - pool = pool->next_pool; - if (!pool) - break; - } - - if (new_pages) { - /* Allocation was unsuccessful */ - int max_pool_level = pool_level; - - pool = root_pool; - - /* Free memory allocated so far */ - for (pool_level = 0; pool_level <= max_pool_level; - pool_level++) { - kbase_mem_pool_lock(pool); - - if (region->gpu_alloc != region->cpu_alloc) { - if (pages_alloced[pool_level] && - cpu_pages[pool_level]) - kbase_free_phy_pages_helper_locked( - region->cpu_alloc, - pool, cpu_pages[pool_level], - pages_alloced[pool_level]); - } - - if (pages_alloced[pool_level] && gpu_pages[pool_level]) - kbase_free_phy_pages_helper_locked( - region->gpu_alloc, - pool, gpu_pages[pool_level], - pages_alloced[pool_level]); - - kbase_mem_pool_unlock(pool); - - pool = pool->next_pool; - } - - /* - * If the allocation failed despite there being enough memory in - * the pool, then just fail. Otherwise, try to grow the memory - * pool. - */ - if (alloc_failed) - *pages_to_grow = 0; - else - *pages_to_grow = new_pages; - - return false; - } - - /* Allocation was successful. No pages to grow, return success. */ - *pages_to_grow = 0; - - return true; -} - -void page_fault_worker(struct work_struct *data) -{ - u64 fault_pfn; - u32 fault_status; - size_t new_pages; - size_t fault_rel_pfn; - struct kbase_as *faulting_as; - int as_no; - struct kbase_context *kctx; - struct kbase_device *kbdev; - struct kbase_va_region *region; - struct kbase_fault *fault; - int err; - bool grown = false; - int pages_to_grow; - bool grow_2mb_pool; - struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; - int i; - - faulting_as = container_of(data, struct kbase_as, work_pagefault); - fault = &faulting_as->pf_data; - fault_pfn = fault->addr >> PAGE_SHIFT; - as_no = faulting_as->number; - - kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - - /* Grab the context that was already refcounted in kbase_mmu_interrupt(). - * Therefore, it cannot be scheduled out of this AS until we explicitly release it - */ - kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); - if (WARN_ON(!kctx)) { - atomic_dec(&kbdev->faults_pending); - return; - } - - KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); - - if (unlikely(fault->protected_mode)) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Protected mode fault", fault); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - - goto fault_done; - } - - fault_status = fault->status; - switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { - - case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: - /* need to check against the region to handle this one */ - break; - - case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: -#ifdef CONFIG_MALI_CINSTR_GWT - /* If GWT was ever enabled then we need to handle - * write fault pages even if the feature was disabled later. - */ - if (kctx->gwt_was_enabled) { - kbase_gpu_mmu_handle_permission_fault(kctx, - faulting_as); - goto fault_done; - } -#endif - - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure", fault); - goto fault_done; - - case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Translation table bus fault", fault); - goto fault_done; - - case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: - /* nothing to do, but we don't expect this fault currently */ - dev_warn(kbdev->dev, "Access flag unexpectedly set"); - goto fault_done; - - case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); - goto fault_done; - - case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); - goto fault_done; - - default: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); - goto fault_done; - } - -#ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs if necessary */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); - if (!prealloc_sas[i]) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Failed pre-allocating memory for sub-allocations' metadata", - fault); - goto fault_done; - } - } -#endif /* CONFIG_MALI_2MB_ALLOC */ - -page_fault_retry: - /* so we have a translation fault, let's see if it is for growable - * memory */ - kbase_gpu_vm_lock(kctx); - - region = kbase_region_tracker_find_region_enclosing_address(kctx, - fault->addr); - if (!region || region->flags & KBASE_REG_FREE) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not mapped on the GPU", fault); - goto fault_done; - } - - if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "DMA-BUF is not mapped on the GPU", fault); - goto fault_done; - } - - if ((region->flags & GROWABLE_FLAGS_REQUIRED) - != GROWABLE_FLAGS_REQUIRED) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory is not growable", fault); - goto fault_done; - } - - if ((region->flags & KBASE_REG_DONT_NEED)) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Don't need memory can't be grown", fault); - goto fault_done; - } - - /* find the size we need to grow it by */ - /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address - * validating the fault_adress to be within a size_t from the start_pfn */ - fault_rel_pfn = fault_pfn - region->start_pfn; - - if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { - dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", - fault->addr, region->start_pfn, - region->start_pfn + - kbase_reg_current_backed_size(region)); - - mutex_lock(&kbdev->mmu_hw_mutex); - - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - /* [1] in case another page fault occurred while we were - * handling the (duplicate) page fault we need to ensure we - * don't loose the other page fault as result of us clearing - * the MMU IRQ. Therefore, after we clear the MMU IRQ we send - * an UNLOCK command that will retry any stalled memory - * transaction (which should cause the other page fault to be - * raised again). - */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - kbase_gpu_vm_unlock(kctx); - - goto fault_done; - } - - new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); - - /* cap to max vsize */ - new_pages = min(new_pages, region->nr_pages - kbase_reg_current_backed_size(region)); - - if (0 == new_pages) { - mutex_lock(&kbdev->mmu_hw_mutex); - - /* Duplicate of a fault we've already handled, nothing to do */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - /* See comment [1] about UNLOCK usage */ - kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, - AS_COMMAND_UNLOCK, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - kbase_gpu_vm_unlock(kctx); - goto fault_done; - } - - pages_to_grow = 0; - - spin_lock(&kctx->mem_partials_lock); - grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, - &grow_2mb_pool, prealloc_sas); - spin_unlock(&kctx->mem_partials_lock); - - if (grown) { - u64 pfn_offset; - u32 op; - - /* alloc success */ - KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); - - /* set up the new pages */ - pfn_offset = kbase_reg_current_backed_size(region) - new_pages; - /* - * Note: - * Issuing an MMU operation will unlock the MMU and cause the - * translation to be replayed. If the page insertion fails then - * rather then trying to continue the context should be killed - * so the no_flush version of insert_pages is used which allows - * us to unlock the MMU as we see fit. - */ - err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, - region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags); - if (err) { - kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); - if (region->gpu_alloc != region->cpu_alloc) - kbase_free_phy_pages_helper(region->cpu_alloc, - new_pages); - kbase_gpu_vm_unlock(kctx); - /* The locked VA region will be unlocked and the cache invalidated in here */ - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page table update failure", fault); - goto fault_done; - } -#if defined(CONFIG_MALI_GATOR_SUPPORT) - kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); -#endif - KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages); - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - /* flush L2 and unlock the VA (resumes the MMU) */ - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) - op = AS_COMMAND_FLUSH; - else - op = AS_COMMAND_FLUSH_PT; - - /* clear MMU interrupt - this needs to be done after updating - * the page tables but before issuing a FLUSH command. The - * FLUSH cmd has a side effect that it restarts stalled memory - * transactions in other address spaces which may cause - * another fault to occur. If we didn't clear the interrupt at - * this stage a new IRQ might not be raised when the GPU finds - * a MMU IRQ is already pending. - */ - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - - kbase_mmu_hw_do_operation(kbdev, faulting_as, - fault->addr >> PAGE_SHIFT, - new_pages, op, 1); - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - /* reenable this in the mask */ - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_PAGE); - -#ifdef CONFIG_MALI_CINSTR_GWT - if (kctx->gwt_enabled) { - /* GWT also tracks growable regions. */ - struct kbasep_gwt_list_element *pos; - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (pos) { - pos->region = region; - pos->page_addr = (region->start_pfn + - pfn_offset) << - PAGE_SHIFT; - pos->num_pages = new_pages; - list_add(&pos->link, - &kctx->gwt_current_list); - } else { - dev_warn(kbdev->dev, "kmalloc failure"); - } - } -#endif - kbase_gpu_vm_unlock(kctx); - } else { - int ret = -ENOMEM; - - kbase_gpu_vm_unlock(kctx); - - /* If the memory pool was insufficient then grow it and retry. - * Otherwise fail the allocation. - */ - if (pages_to_grow > 0) { -#ifdef CONFIG_MALI_2MB_ALLOC - if (grow_2mb_pool) { - /* Round page requirement up to nearest 2 MB */ - pages_to_grow = (pages_to_grow + - ((1 << kctx->lp_mem_pool.order) - 1)) - >> kctx->lp_mem_pool.order; - ret = kbase_mem_pool_grow(&kctx->lp_mem_pool, - pages_to_grow); - } else { -#endif - ret = kbase_mem_pool_grow(&kctx->mem_pool, - pages_to_grow); -#ifdef CONFIG_MALI_2MB_ALLOC - } -#endif - } - if (ret < 0) { - /* failed to extend, handle as a normal PF */ - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Page allocation failure", fault); - } else { - goto page_fault_retry; - } - } - -fault_done: - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) - kfree(prealloc_sas[i]); - - /* - * By this point, the fault was handled in some way, - * so release the ctx refcount - */ - kbasep_js_runpool_release_ctx(kbdev, kctx); - - atomic_dec(&kbdev->faults_pending); -} - -static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut) -{ - u64 *page; - int i; - struct page *p; - - p = kbase_mem_pool_alloc(&kbdev->mem_pool); - if (!p) - return 0; - - page = kmap(p); - if (NULL == page) - goto alloc_free; - - /* If the MMU tables belong to a context then account the memory usage - * to that context, otherwise the MMU tables are device wide and are - * only accounted to the device. - */ - if (mmut->kctx) { - int new_page_count; - - new_page_count = kbase_atomic_add_pages(1, - &mmut->kctx->used_pages); - KBASE_TLSTREAM_AUX_PAGESALLOC( - mmut->kctx->id, - (u64)new_page_count); - kbase_process_page_usage_inc(mmut->kctx, 1); - } - - kbase_atomic_add_pages(1, &kbdev->memdev.used_pages); - - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) - kbdev->mmu_mode->entry_invalidate(&page[i]); - - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); - - kunmap(p); - return page_to_phys(p); - -alloc_free: - kbase_mem_pool_free(&kbdev->mem_pool, p, false); - - return 0; -} - -/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the - * new table from the pool if needed and possible - */ -static int mmu_get_next_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level) -{ - u64 *page; - phys_addr_t target_pgd; - struct page *p; - - KBASE_DEBUG_ASSERT(*pgd); - - lockdep_assert_held(&mmut->mmu_lock); - - /* - * Architecture spec defines level-0 as being the top-most. - * This is a bit unfortunate here, but we keep the same convention. - */ - vpfn >>= (3 - level) * 9; - vpfn &= 0x1FF; - - p = pfn_to_page(PFN_DOWN(*pgd)); - page = kmap(p); - if (NULL == page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); - return -EINVAL; - } - - target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); - - if (!target_pgd) { - target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - if (!target_pgd) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", - __func__); - kunmap(p); - return -ENOMEM; - } - - kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); - - kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); - /* Rely on the caller to update the address space flags. */ - } - - kunmap(p); - *pgd = target_pgd; - - return 0; -} - -/* - * Returns the PGD for the specified level of translation - */ -static int mmu_get_pgd_at_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - unsigned int level, - phys_addr_t *out_pgd) -{ - phys_addr_t pgd; - int l; - - lockdep_assert_held(&mmut->mmu_lock); - pgd = mmut->pgd; - - for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); - /* Handle failure condition */ - if (err) { - dev_dbg(kbdev->dev, - "%s: mmu_get_next_pgd failure at level %d\n", - __func__, l); - return err; - } - } - - *out_pgd = pgd; - - return 0; -} - -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 vpfn, - phys_addr_t *out_pgd) -{ - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, - out_pgd); -} - -static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - u64 from_vpfn, u64 to_vpfn) -{ - phys_addr_t pgd; - u64 vpfn = from_vpfn; - struct kbase_mmu_mode const *mmu_mode; - - /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); - - lockdep_assert_held(&mmut->mmu_lock); - - mmu_mode = kbdev->mmu_mode; - - while (vpfn < to_vpfn) { - unsigned int i; - unsigned int idx = vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; - unsigned int pcount = 0; - unsigned int left = to_vpfn - vpfn; - unsigned int level; - u64 *page; - - if (count > left) - count = left; - - /* need to check if this is a 2MB page or a 4kB */ - pgd = mmut->pgd; - - for (level = MIDGARD_MMU_TOPLEVEL; - level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { - idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); - if (mmu_mode->ate_is_valid(page[idx], level)) - break; /* keep the mapping */ - kunmap(phys_to_page(pgd)); - pgd = mmu_mode->pte_to_phy_addr(page[idx]); - } - - switch (level) { - case MIDGARD_MMU_LEVEL(2): - /* remap to single entry to update */ - pcount = 1; - break; - case MIDGARD_MMU_BOTTOMLEVEL: - /* page count is the same as the logical count */ - pcount = count; - break; - default: - dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", - __func__, level); - goto next; - } - - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[idx + i]); - - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, - 8 * pcount); - kunmap(phys_to_page(pgd)); - -next: - vpfn += count; - } -} - -/* - * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' - */ -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags) -{ - phys_addr_t pgd; - u64 *pgd_page; - /* In case the insert_single_page only partially completes we need to be - * able to recover */ - bool recover_required = false; - u64 recover_vpfn = vpfn; - size_t recover_count = 0; - size_t remain = nr; - int err; - struct kbase_mmu_mode const *mmu_mode; - - KBASE_DEBUG_ASSERT(NULL != kctx); - /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - - mmu_mode = kctx->kbdev->mmu_mode; - - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&kctx->mmu.mmu_lock); - - while (remain) { - unsigned int i; - unsigned int index = vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; - struct page *p; - - if (count > remain) - count = remain; - - /* - * Repeatedly calling mmu_get_bottom_pte() is clearly - * suboptimal. We don't have to re-parse the whole tree - * each time (just cache the l0-l2 sequence). - * On the other hand, it's only a gain when we map more than - * 256 pages at once (on average). Do we really care? - */ - do { - err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu, - vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); - if (err) { - dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); - if (recover_required) { - /* Invalidate the pages we have partially - * completed */ - mmu_insert_pages_failure_recovery(kctx->kbdev, - &kctx->mmu, - recover_vpfn, - recover_vpfn + recover_count); - } - goto fail_unlock; - } - - p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); - if (!pgd_page) { - dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); - if (recover_required) { - /* Invalidate the pages we have partially - * completed */ - mmu_insert_pages_failure_recovery(kctx->kbdev, - &kctx->mmu, - recover_vpfn, - recover_vpfn + recover_count); - } - err = -ENOMEM; - goto fail_unlock; - } - - for (i = 0; i < count; i++) { - unsigned int ofs = index + i; - - /* Fail if the current page is a valid ATE entry */ - KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); - - mmu_mode->entry_set_ate(&pgd_page[ofs], - phys, flags, - MIDGARD_MMU_BOTTOMLEVEL); - } - - vpfn += count; - remain -= count; - - kbase_mmu_sync_pgd(kctx->kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); - - kunmap(p); - /* We have started modifying the page table. - * If further pages need inserting and fail we need to undo what - * has already taken place */ - recover_required = true; - recover_count += count; - } - mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); - return 0; - -fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); - return err; -} - -static inline void cleanup_empty_pte(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 *pte) -{ - phys_addr_t tmp_pgd; - struct page *tmp_p; - - tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); - tmp_p = phys_to_page(tmp_pgd); - kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false); - - /* If the MMU tables belong to a context then we accounted the memory - * usage to that context, so decrement here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); - } - kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); -} - -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - const u64 start_vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags) -{ - phys_addr_t pgd; - u64 *pgd_page; - u64 insert_vpfn = start_vpfn; - size_t remain = nr; - int err; - struct kbase_mmu_mode const *mmu_mode; - - /* Note that 0 is a valid start_vpfn */ - /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); - - mmu_mode = kbdev->mmu_mode; - - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&mmut->mmu_lock); - - while (remain) { - unsigned int i; - unsigned int vindex = insert_vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; - struct page *p; - unsigned int cur_level; - - if (count > remain) - count = remain; - - if (!vindex && is_huge_head(*phys)) - cur_level = MIDGARD_MMU_LEVEL(2); - else - cur_level = MIDGARD_MMU_BOTTOMLEVEL; - - /* - * Repeatedly calling mmu_get_pgd_at_level() is clearly - * suboptimal. We don't have to re-parse the whole tree - * each time (just cache the l0-l2 sequence). - * On the other hand, it's only a gain when we map more than - * 256 pages at once (on average). Do we really care? - */ - do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, - cur_level, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&mmut->mmu_lock); - err = kbase_mem_pool_grow(&kbdev->mem_pool, - cur_level); - mutex_lock(&mmut->mmu_lock); - } while (!err); - - if (err) { - dev_warn(kbdev->dev, - "%s: mmu_get_bottom_pgd failure\n", __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); - } - goto fail_unlock; - } - - p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); - if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure\n", - __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed */ - mmu_insert_pages_failure_recovery(kbdev, - mmut, start_vpfn, insert_vpfn); - } - err = -ENOMEM; - goto fail_unlock; - } - - if (cur_level == MIDGARD_MMU_LEVEL(2)) { - unsigned int level_index = (insert_vpfn >> 9) & 0x1FF; - u64 *target = &pgd_page[level_index]; - - if (mmu_mode->pte_is_valid(*target, cur_level)) - cleanup_empty_pte(kbdev, mmut, target); - mmu_mode->entry_set_ate(target, *phys, flags, - cur_level); - } else { - for (i = 0; i < count; i++) { - unsigned int ofs = vindex + i; - u64 *target = &pgd_page[ofs]; - - /* Warn if the current page is a valid ATE - * entry. The page table shouldn't have anything - * in the place where we are trying to put a - * new entry. Modification to page table entries - * should be performed with - * kbase_mmu_update_pages() - */ - WARN_ON((*target & 1UL) != 0); - - kbdev->mmu_mode->entry_set_ate(target, - phys[i], flags, cur_level); - } - } - - phys += count; - insert_vpfn += count; - remain -= count; - - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(p) + (vindex * sizeof(u64)), - count * sizeof(u64)); - - kunmap(p); - } - - err = 0; - -fail_unlock: - mutex_unlock(&mmut->mmu_lock); - return err; -} - -/* - * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space - * number 'as_nr'. - */ -int kbase_mmu_insert_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr) -{ - int err; - - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, - phys, nr, flags); - - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); - -/** - * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches - * without retaining the kbase context. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * - * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any - * other locking. - */ -static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) -{ - struct kbase_device *kbdev = kctx->kbdev; - int err; - u32 op; - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - vpfn, nr, op, 0); - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu_locked(kbdev)) - kbase_reset_gpu_locked(kbdev); - } - -#ifndef CONFIG_MALI_NO_MALI - /* - * As this function could be called in interrupt context the sync - * request can't block. Instead log the request and the next flush - * request will pick it up. - */ - if ((!err) && sync && - kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) - atomic_set(&kctx->drain_pending, 1); -#endif /* !CONFIG_MALI_NO_MALI */ -} - -/* Perform a flush/invalidate on a particular address space - */ -static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, - struct kbase_as *as, - u64 vpfn, size_t nr, bool sync, bool drain_pending) -{ - int err; - u32 op; - - if (kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU is off so there's no need to perform flush/invalidate */ - return; - } - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - as, vpfn, nr, op, 0); - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - -#ifndef CONFIG_MALI_NO_MALI - /* - * The transaction lock must be dropped before here - * as kbase_wait_write_flush could take it if - * the GPU was powered down (static analysis doesn't - * know this can't happen). - */ - drain_pending |= (!err) && sync && - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367); - if (drain_pending) { - /* Wait for GPU to flush write buffer */ - kbase_wait_write_flush(kbdev); - } -#endif /* !CONFIG_MALI_NO_MALI */ - - kbase_pm_context_idle(kbdev); -} - -static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, - u64 vpfn, size_t nr, bool sync, int as_nr) -{ - /* Skip if there is nothing to do */ - if (nr) { - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, - nr, sync, false); - } -} - -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) -{ - struct kbase_device *kbdev; - bool ctx_is_in_runpool; - bool drain_pending = false; - -#ifndef CONFIG_MALI_NO_MALI - if (atomic_xchg(&kctx->drain_pending, 0)) - drain_pending = true; -#endif /* !CONFIG_MALI_NO_MALI */ - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - kbdev = kctx->kbdev; - mutex_lock(&kbdev->js_data.queue_mutex); - ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); - mutex_unlock(&kbdev->js_data.queue_mutex); - - if (ctx_is_in_runpool) { - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], - vpfn, nr, sync, drain_pending); - - kbasep_js_runpool_release_ctx(kbdev, kctx); - } -} - -void kbase_mmu_update(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - lockdep_assert_held(&kbdev->mmu_hw_mutex); - KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); - - kbdev->mmu_mode->update(kbdev, mmut, as_nr); -} -KBASE_EXPORT_TEST_API(kbase_mmu_update); - -void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - lockdep_assert_held(&kbdev->hwaccess_lock); - lockdep_assert_held(&kbdev->mmu_hw_mutex); - - kbdev->mmu_mode->disable_as(kbdev, as_nr); -} - -void kbase_mmu_disable(struct kbase_context *kctx) -{ - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the hwaccess_lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - - /* - * The address space is being disabled, drain all knowledge of it out - * from the caches as pages and page tables might be freed after this. - * - * The job scheduler code will already be holding the locks and context - * so just do the flush. - */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); - - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); -} -KBASE_EXPORT_TEST_API(kbase_mmu_disable); - -/* - * We actually only discard the ATE, and not the page table - * pages. There is a potential DoS here, as we'll leak memory by - * having PTEs that are potentially unused. Will require physical - * page accounting, so MMU pages are part of the process allocation. - * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. - */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) -{ - phys_addr_t pgd; - size_t requested_nr = nr; - struct kbase_mmu_mode const *mmu_mode; - int err = -EFAULT; - - if (0 == nr) { - /* early out if nothing to do */ - return 0; - } - - mutex_lock(&mmut->mmu_lock); - - mmu_mode = kbdev->mmu_mode; - - while (nr) { - unsigned int i; - unsigned int index = vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; - unsigned int pcount; - unsigned int level; - u64 *page; - - if (count > nr) - count = nr; - - /* need to check if this is a 2MB or a 4kB page */ - pgd = mmut->pgd; - - for (level = MIDGARD_MMU_TOPLEVEL; - level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { - phys_addr_t next_pgd; - - index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(phys_to_page(pgd)); - if (mmu_mode->ate_is_valid(page[index], level)) - break; /* keep the mapping */ - else if (!mmu_mode->pte_is_valid(page[index], level)) { - /* nothing here, advance */ - switch (level) { - case MIDGARD_MMU_LEVEL(0): - count = 134217728; - break; - case MIDGARD_MMU_LEVEL(1): - count = 262144; - break; - case MIDGARD_MMU_LEVEL(2): - count = 512; - break; - case MIDGARD_MMU_LEVEL(3): - count = 1; - break; - } - if (count > nr) - count = nr; - goto next; - } - next_pgd = mmu_mode->pte_to_phy_addr(page[index]); - kunmap(phys_to_page(pgd)); - pgd = next_pgd; - } - - switch (level) { - case MIDGARD_MMU_LEVEL(0): - case MIDGARD_MMU_LEVEL(1): - dev_warn(kbdev->dev, - "%s: No support for ATEs at level %d\n", - __func__, level); - kunmap(phys_to_page(pgd)); - goto out; - case MIDGARD_MMU_LEVEL(2): - /* can only teardown if count >= 512 */ - if (count >= 512) { - pcount = 1; - } else { - dev_warn(kbdev->dev, - "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", - __func__, count); - pcount = 0; - } - break; - case MIDGARD_MMU_BOTTOMLEVEL: - /* page count is the same as the logical count */ - pcount = count; - break; - default: - dev_err(kbdev->dev, - "%s: found non-mapped memory, early out\n", - __func__); - vpfn += count; - nr -= count; - continue; - } - - /* Invalidate the entries we added */ - for (i = 0; i < pcount; i++) - mmu_mode->entry_invalidate(&page[index + i]); - - kbase_mmu_sync_pgd(kbdev, - kbase_dma_addr(phys_to_page(pgd)) + - 8 * index, 8*pcount); - -next: - kunmap(phys_to_page(pgd)); - vpfn += count; - nr -= count; - } - err = 0; -out: - mutex_unlock(&mmut->mmu_lock); - - if (mmut->kctx) - kbase_mmu_flush_invalidate(mmut->kctx, vpfn, requested_nr, true); - else - kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, requested_nr, true, as_nr); - - return err; -} - -KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); - -/** - * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU - * - * This will update page table entries that already exist on the GPU based on - * the new flags that are passed. It is used as a response to the changes of - * the memory attributes - * - * The caller is responsible for validating the memory attributes - * - * @kctx: Kbase context - * @vpfn: Virtual PFN (Page Frame Number) of the first page to update - * @phys: Tagged physical addresses of the physical pages to replace the - * current mappings - * @nr: Number of pages to update - * @flags: Flags - */ -static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags) -{ - phys_addr_t pgd; - u64 *pgd_page; - struct kbase_mmu_mode const *mmu_mode; - int err; - - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&kctx->mmu.mmu_lock); - - mmu_mode = kctx->kbdev->mmu_mode; - - while (nr) { - unsigned int i; - unsigned int index = vpfn & 0x1FF; - size_t count = KBASE_MMU_PAGE_ENTRIES - index; - struct page *p; - - if (count > nr) - count = nr; - - do { - err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu, - vpfn, &pgd); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); - if (err) { - dev_warn(kctx->kbdev->dev, - "mmu_get_bottom_pgd failure\n"); - goto fail_unlock; - } - - p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); - if (!pgd_page) { - dev_warn(kctx->kbdev->dev, "kmap failure\n"); - err = -ENOMEM; - goto fail_unlock; - } - - for (i = 0; i < count; i++) - mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i], - flags, MIDGARD_MMU_BOTTOMLEVEL); - - phys += count; - vpfn += count; - nr -= count; - - kbase_mmu_sync_pgd(kctx->kbdev, - kbase_dma_addr(p) + (index * sizeof(u64)), - count * sizeof(u64)); - - kunmap(pfn_to_page(PFN_DOWN(pgd))); - } - - mutex_unlock(&kctx->mmu.mmu_lock); - return 0; - -fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); - return err; -} - -int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr *phys, size_t nr, - unsigned long flags) -{ - int err; - - err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, true); - return err; -} - -static void mmu_teardown_level(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, phys_addr_t pgd, - int level, u64 *pgd_page_buffer) -{ - phys_addr_t target_pgd; - struct page *p; - u64 *pgd_page; - int i; - struct kbase_mmu_mode const *mmu_mode; - - lockdep_assert_held(&mmut->mmu_lock); - - pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); - /* kmap_atomic should NEVER fail. */ - KBASE_DEBUG_ASSERT(NULL != pgd_page); - /* Copy the page to our preallocated buffer so that we can minimize - * kmap_atomic usage */ - memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); - kunmap_atomic(pgd_page); - pgd_page = pgd_page_buffer; - - mmu_mode = kbdev->mmu_mode; - - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); - - if (target_pgd) { - if (mmu_mode->pte_is_valid(pgd_page[i], level)) { - mmu_teardown_level(kbdev, mmut, - target_pgd, - level + 1, - pgd_page_buffer + - (PAGE_SIZE / sizeof(u64))); - } - } - } - - p = pfn_to_page(PFN_DOWN(pgd)); - kbase_mem_pool_free(&kbdev->mem_pool, p, true); - kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages); - - /* If MMU tables belong to a context then pages will have been accounted - * against it, so we must decrement the usage counts here. - */ - if (mmut->kctx) { - kbase_process_page_usage_dec(mmut->kctx, 1); - kbase_atomic_sub_pages(1, &mmut->kctx->used_pages); - } -} - -int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct kbase_context *kctx) -{ - mutex_init(&mmut->mmu_lock); - mmut->kctx = kctx; - - /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ - mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); - - if (mmut->mmu_teardown_pages == NULL) - return -ENOMEM; - - mmut->pgd = 0; - /* We allocate pages into the kbdev memory pool, then - * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to - * avoid allocations from the kernel happening with the lock held. - */ - while (!mmut->pgd) { - int err; - - err = kbase_mem_pool_grow(&kbdev->mem_pool, - MIDGARD_MMU_BOTTOMLEVEL); - if (err) { - kbase_mmu_term(kbdev, mmut); - return -ENOMEM; - } - - mutex_lock(&mmut->mmu_lock); - mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - mutex_unlock(&mmut->mmu_lock); - } - - return 0; -} - -void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) -{ - if (mmut->pgd) { - mutex_lock(&mmut->mmu_lock); - mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, - mmut->mmu_teardown_pages); - mutex_unlock(&mmut->mmu_lock); - - if (mmut->kctx) - KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0); - } - - kfree(mmut->mmu_teardown_pages); - mutex_destroy(&mmut->mmu_lock); -} - -static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) -{ - phys_addr_t target_pgd; - u64 *pgd_page; - int i; - size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); - size_t dump_size; - struct kbase_mmu_mode const *mmu_mode; - - KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu.mmu_lock); - - mmu_mode = kctx->kbdev->mmu_mode; - - pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); - if (!pgd_page) { - dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n"); - return 0; - } - - if (*size_left >= size) { - /* A modified physical address that contains the page table level */ - u64 m_pgd = pgd | level; - - /* Put the modified physical address in the output buffer */ - memcpy(*buffer, &m_pgd, sizeof(m_pgd)); - *buffer += sizeof(m_pgd); - - /* Followed by the page table itself */ - memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); - *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; - - *size_left -= size; - } - - if (level < MIDGARD_MMU_BOTTOMLEVEL) { - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - if (mmu_mode->pte_is_valid(pgd_page[i], level)) { - target_pgd = mmu_mode->pte_to_phy_addr( - pgd_page[i]); - - dump_size = kbasep_mmu_dump_level(kctx, - target_pgd, level + 1, - buffer, size_left); - if (!dump_size) { - kunmap(pfn_to_page(PFN_DOWN(pgd))); - return 0; - } - size += dump_size; - } - } - } - - kunmap(pfn_to_page(PFN_DOWN(pgd))); - - return size; -} - -void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) -{ - void *kaddr; - size_t size_left; - - KBASE_DEBUG_ASSERT(kctx); - - if (0 == nr_pages) { - /* can't dump in a 0 sized buffer, early out */ - return NULL; - } - - size_left = nr_pages * PAGE_SIZE; - - KBASE_DEBUG_ASSERT(0 != size_left); - kaddr = vmalloc_user(size_left); - - mutex_lock(&kctx->mmu.mmu_lock); - - if (kaddr) { - u64 end_marker = 0xFFULL; - char *buffer; - char *mmu_dump_buffer; - u64 config[3]; - size_t dump_size, size = 0; - - buffer = (char *)kaddr; - mmu_dump_buffer = buffer; - - if (kctx->api_version >= KBASE_API_VERSION(8, 4)) { - struct kbase_mmu_setup as_setup; - - kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, - &as_setup); - config[0] = as_setup.transtab; - config[1] = as_setup.memattr; - config[2] = as_setup.transcfg; - memcpy(buffer, &config, sizeof(config)); - mmu_dump_buffer += sizeof(config); - size_left -= sizeof(config); - size += sizeof(config); - } - - dump_size = kbasep_mmu_dump_level(kctx, - kctx->mmu.pgd, - MIDGARD_MMU_TOPLEVEL, - &mmu_dump_buffer, - &size_left); - - if (!dump_size) - goto fail_free; - - size += dump_size; - - /* Add on the size for the end marker */ - size += sizeof(u64); - - if (size > (nr_pages * PAGE_SIZE)) { - /* The buffer isn't big enough - free the memory and return failure */ - goto fail_free; - } - - /* Add the end marker */ - memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); - } - - mutex_unlock(&kctx->mmu.mmu_lock); - return kaddr; - -fail_free: - vfree(kaddr); - mutex_unlock(&kctx->mmu.mmu_lock); - return NULL; -} -KBASE_EXPORT_TEST_API(kbase_mmu_dump); - -void bus_fault_worker(struct work_struct *data) -{ - struct kbase_as *faulting_as; - int as_no; - struct kbase_context *kctx; - struct kbase_device *kbdev; - struct kbase_fault *fault; - bool reset_status = false; - - faulting_as = container_of(data, struct kbase_as, work_busfault); - fault = &faulting_as->bf_data; - - /* Ensure that any pending page fault worker has completed */ - flush_work(&faulting_as->work_pagefault); - - as_no = faulting_as->number; - - kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); - - /* Grab the context that was already refcounted in kbase_mmu_interrupt(). - * Therefore, it cannot be scheduled out of this AS until we explicitly release it - */ - kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); - if (WARN_ON(!kctx)) { - atomic_dec(&kbdev->faults_pending); - return; - } - - if (unlikely(fault->protected_mode)) { - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Permission failure", fault); - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbasep_js_runpool_release_ctx(kbdev, kctx); - atomic_dec(&kbdev->faults_pending); - return; - - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { - /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. - * We start the reset before switching to UNMAPPED to ensure that unrelated jobs - * are evicted from the GPU before the switch. - */ - dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); - reset_status = kbase_prepare_to_reset_gpu(kbdev); - } - /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ - if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - unsigned long flags; - - /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - - /* Set the MMU into unmapped mode */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - kbase_mmu_hw_clear_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, faulting_as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - - kbase_pm_context_idle(kbdev); - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) - kbase_reset_gpu(kbdev); - - kbasep_js_runpool_release_ctx(kbdev, kctx); - - atomic_dec(&kbdev->faults_pending); -} - -const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) -{ - const char *e; - - switch (exception_code) { - /* Non-Fault Status code */ - case 0x00: - e = "NOT_STARTED/IDLE/OK"; - break; - case 0x01: - e = "DONE"; - break; - case 0x02: - e = "INTERRUPTED"; - break; - case 0x03: - e = "STOPPED"; - break; - case 0x04: - e = "TERMINATED"; - break; - case 0x08: - e = "ACTIVE"; - break; - /* Job exceptions */ - case 0x40: - e = "JOB_CONFIG_FAULT"; - break; - case 0x41: - e = "JOB_POWER_FAULT"; - break; - case 0x42: - e = "JOB_READ_FAULT"; - break; - case 0x43: - e = "JOB_WRITE_FAULT"; - break; - case 0x44: - e = "JOB_AFFINITY_FAULT"; - break; - case 0x48: - e = "JOB_BUS_FAULT"; - break; - case 0x50: - e = "INSTR_INVALID_PC"; - break; - case 0x51: - e = "INSTR_INVALID_ENC"; - break; - case 0x52: - e = "INSTR_TYPE_MISMATCH"; - break; - case 0x53: - e = "INSTR_OPERAND_FAULT"; - break; - case 0x54: - e = "INSTR_TLS_FAULT"; - break; - case 0x55: - e = "INSTR_BARRIER_FAULT"; - break; - case 0x56: - e = "INSTR_ALIGN_FAULT"; - break; - case 0x58: - e = "DATA_INVALID_FAULT"; - break; - case 0x59: - e = "TILE_RANGE_FAULT"; - break; - case 0x5A: - e = "ADDR_RANGE_FAULT"; - break; - case 0x60: - e = "OUT_OF_MEMORY"; - break; - /* GPU exceptions */ - case 0x80: - e = "DELAYED_BUS_FAULT"; - break; - case 0x88: - e = "SHAREABILITY_FAULT"; - break; - /* MMU exceptions */ - case 0xC0: - case 0xC1: - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - e = "TRANSLATION_FAULT"; - break; - case 0xC8: - e = "PERMISSION_FAULT"; - break; - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "PERMISSION_FAULT"; - else - e = "UNKNOWN"; - break; - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - e = "TRANSTAB_BUS_FAULT"; - break; - case 0xD8: - e = "ACCESS_FLAG"; - break; - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "ACCESS_FLAG"; - else - e = "UNKNOWN"; - break; - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "ADDRESS_SIZE_FAULT"; - else - e = "UNKNOWN"; - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - e = "MEMORY_ATTRIBUTES_FAULT"; - else - e = "UNKNOWN"; - break; - default: - e = "UNKNOWN"; - break; - }; - - return e; -} - -static const char *access_type_name(struct kbase_device *kbdev, - u32 fault_status) -{ - switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { - case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - return "ATOMIC"; - else - return "UNKNOWN"; - case AS_FAULTSTATUS_ACCESS_TYPE_READ: - return "READ"; - case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: - return "WRITE"; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: - return "EXECUTE"; - default: - WARN_ON(1); - return NULL; - } -} - -/** - * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. - */ -static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, - struct kbase_as *as, const char *reason_str, - struct kbase_fault *fault) -{ - unsigned long flags; - int exception_type; - int access_type; - int source_id; - int as_no; - struct kbase_device *kbdev; - struct kbasep_js_device_data *js_devdata; - - bool reset_status = false; - - as_no = as->number; - kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; - - /* ASSERT that the context won't leave the runpool */ - KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); - - /* decode the fault status */ - exception_type = fault->status & 0xFF; - access_type = (fault->status >> 8) & 0x3; - source_id = (fault->status >> 16); - - /* terminal fault, print info about the fault */ - dev_err(kbdev->dev, - "Unhandled Page fault in AS%d at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "decoded fault status: %s\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n" - "pid: %d\n", - as_no, fault->addr, - reason_str, - fault->status, - (fault->status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, kbase_exception_name(kbdev, exception_type), - access_type, access_type_name(kbdev, fault->status), - source_id, - kctx->pid); - - /* hardware counters dump fault handling */ - if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) { - if ((fault->addr >= kbdev->hwcnt.addr) && - (fault->addr < (kbdev->hwcnt.addr + - kbdev->hwcnt.addr_bytes))) - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; - } - - /* Stop the kctx from submitting more jobs and cause it to be scheduled - * out/rescheduled - this will occur on releasing the context's refcount */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbasep_js_clear_submit_allowed(js_devdata, kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this - * context can appear in the job slots from this point on */ - kbase_backend_jm_kill_jobs_from_kctx(kctx); - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { - /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. - * We start the reset before switching to UNMAPPED to ensure that unrelated jobs - * are evicted from the GPU before the switch. - */ - dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); - reset_status = kbase_prepare_to_reset_gpu(kbdev); - } - /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - /* Clear down the fault */ - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) - kbase_reset_gpu(kbdev); -} - -void kbasep_as_do_poke(struct work_struct *work) -{ - struct kbase_as *as; - struct kbase_device *kbdev; - unsigned long flags; - - KBASE_DEBUG_ASSERT(work); - as = container_of(work, struct kbase_as, poke_work); - kbdev = container_of(as, struct kbase_device, as[as->number]); - KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); - - /* GPU power will already be active by virtue of the caller holding a JS - * reference on the address space, and will not release it until this worker - * has finished */ - - /* Further to the comment above, we know that while this function is running - * the AS will not be released as before the atom is released this workqueue - * is flushed (in kbase_as_poking_timer_release_atom) - */ - - /* AS transaction begin */ - mutex_lock(&kbdev->mmu_hw_mutex); - /* Force a uTLB invalidate */ - kbase_mmu_hw_do_operation(kbdev, as, 0, 0, - AS_COMMAND_UNLOCK, 0); - mutex_unlock(&kbdev->mmu_hw_mutex); - /* AS transaction end */ - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (as->poke_refcount && - !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { - /* Only queue up the timer if we need it, and we're not trying to kill it */ - hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); -} - -enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) -{ - struct kbase_as *as; - int queue_work_ret; - - KBASE_DEBUG_ASSERT(NULL != timer); - as = container_of(timer, struct kbase_as, poke_timer); - KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); - - queue_work_ret = queue_work(as->poke_wq, &as->poke_work); - KBASE_DEBUG_ASSERT(queue_work_ret); - return HRTIMER_NORESTART; -} - -/** - * Retain the poking timer on an atom's context (if the atom hasn't already - * done so), and start the timer (if it's not already started). - * - * This must only be called on a context that's scheduled in, and an atom - * that's running on the GPU. - * - * The caller must hold hwaccess_lock - * - * This can be called safely from atomic context - */ -void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - struct kbase_as *as; - - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(katom); - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (katom->poking) - return; - - katom->poking = 1; - - /* It's safe to work on the as/as_nr without an explicit reference, - * because the caller holds the hwaccess_lock, and the atom itself - * was also running and had already taken a reference */ - as = &kbdev->as[kctx->as_nr]; - - if (++(as->poke_refcount) == 1) { - /* First refcount for poke needed: check if not already in flight */ - if (!as->poke_state) { - /* need to start poking */ - as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT; - queue_work(as->poke_wq, &as->poke_work); - } - } -} - -/** - * If an atom holds a poking timer, release it and wait for it to finish - * - * This must only be called on a context that's scheduled in, and an atom - * that still has a JS reference on the context - * - * This must \b not be called from atomic context, since it can sleep. - */ -void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) -{ - struct kbase_as *as; - unsigned long flags; - - KBASE_DEBUG_ASSERT(kbdev); - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(katom); - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - if (!katom->poking) - return; - - as = &kbdev->as[kctx->as_nr]; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - KBASE_DEBUG_ASSERT(as->poke_refcount > 0); - KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); - - if (--(as->poke_refcount) == 0) { - as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - hrtimer_cancel(&as->poke_timer); - flush_workqueue(as->poke_wq); - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - - /* Re-check whether it's still needed */ - if (as->poke_refcount) { - int queue_work_ret; - /* Poking still needed: - * - Another retain will not be starting the timer or queueing work, - * because it's still marked as in-flight - * - The hrtimer has finished, and has not started a new timer or - * queued work because it's been marked as killing - * - * So whatever happens now, just queue the work again */ - as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE); - queue_work_ret = queue_work(as->poke_wq, &as->poke_work); - KBASE_DEBUG_ASSERT(queue_work_ret); - } else { - /* It isn't - so mark it as not in flight, and not killing */ - as->poke_state = 0u; - - /* The poke associated with the atom has now finished. If this is - * also the last atom on the context, then we can guarentee no more - * pokes (and thus no more poking register accesses) will occur on - * the context until new atoms are run */ - } - } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - katom->poking = 0; -} - -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - if (!kctx) { - dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", - kbase_as_has_bus_fault(as) ? - "Bus error" : "Page fault", - as->number, fault->addr); - - /* Since no ctx was found, the MMU must be disabled. */ - WARN_ON(as->current_setup.transtab); - - if (kbase_as_has_bus_fault(as)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); - } else if (kbase_as_has_page_fault(as)) { - kbase_mmu_hw_clear_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - kbase_mmu_hw_enable_fault(kbdev, as, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); - } - - if (kbase_as_has_bus_fault(as) && - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { - bool reset_status; - /* - * Reset the GPU, like in bus_fault_worker, in case an - * earlier error hasn't been properly cleared by this - * point. - */ - dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); - reset_status = kbase_prepare_to_reset_gpu_locked(kbdev); - if (reset_status) - kbase_reset_gpu_locked(kbdev); - } - - return; - } - - if (kbase_as_has_bus_fault(as)) { - /* - * hw counters dumping in progress, signal the - * other thread that it failed - */ - if ((kbdev->hwcnt.kctx == kctx) && - (kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_DUMPING)) - kbdev->hwcnt.backend.state = - KBASE_INSTR_STATE_FAULT; - - /* - * Stop the kctx from submitting more jobs and cause it - * to be scheduled out/rescheduled when all references - * to it are released - */ - kbasep_js_clear_submit_allowed(js_devdata, kctx); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); - else - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, fault->addr); - - /* - * We need to switch to UNMAPPED mode - but we do this in a - * worker so that we can sleep - */ - WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); - atomic_inc(&kbdev->faults_pending); - } else { - WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); - atomic_inc(&kbdev->faults_pending); - } -} - -void kbase_flush_mmu_wqs(struct kbase_device *kbdev) -{ - int i; - - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - struct kbase_as *as = &kbdev->as[i]; - - flush_workqueue(as->pf_wq); - } -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_hw.h deleted file mode 100755 index 70d5f2becc71..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_hw.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file - * Interface file for accessing MMU hardware functionality - */ - -/** - * @page mali_kbase_mmu_hw_page MMU hardware interface - * - * @section mali_kbase_mmu_hw_intro_sec Introduction - * This module provides an abstraction for accessing the functionality provided - * by the midgard MMU and thus allows all MMU HW access to be contained within - * one common place and allows for different backends (implementations) to - * be provided. - */ - -#ifndef _MALI_KBASE_MMU_HW_H_ -#define _MALI_KBASE_MMU_HW_H_ - -/* Forward declarations */ -struct kbase_device; -struct kbase_as; -struct kbase_context; - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup mali_kbase_mmu_hw MMU access APIs - * @{ - */ - -/** @brief MMU fault type descriptor. - */ -enum kbase_mmu_fault_type { - KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, - KBASE_MMU_FAULT_TYPE_PAGE, - KBASE_MMU_FAULT_TYPE_BUS, - KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, - KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED -}; - -/** @brief Configure an address space for use. - * - * Configure the MMU using the address space details setup in the - * @ref kbase_context structure. - * - * @param[in] kbdev kbase device to configure. - * @param[in] as address space to configure. - */ -void kbase_mmu_hw_configure(struct kbase_device *kbdev, - struct kbase_as *as); - -/** @brief Issue an operation to the MMU. - * - * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that - * is associated with the provided @ref kbase_context over the specified range - * - * @param[in] kbdev kbase device to issue the MMU operation on. - * @param[in] as address space to issue the MMU operation on. - * @param[in] vpfn MMU Virtual Page Frame Number to start the - * operation on. - * @param[in] nr Number of pages to work on. - * @param[in] type Operation type (written to ASn_COMMAND). - * @param[in] handling_irq Is this operation being called during the handling - * of an interrupt? - * - * @return Zero if the operation was successful, non-zero otherwise. - */ -int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, - u64 vpfn, u32 nr, u32 type, - unsigned int handling_irq); - -/** @brief Clear a fault that has been previously reported by the MMU. - * - * Clear a bus error or page fault that has been reported by the MMU. - * - * @param[in] kbdev kbase device to clear the fault from. - * @param[in] as address space to clear the fault from. - * @param[in] type The type of fault that needs to be cleared. - */ -void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type); - -/** @brief Enable fault that has been previously reported by the MMU. - * - * After a page fault or bus error has been reported by the MMU these - * will be disabled. After these are handled this function needs to be - * called to enable the page fault or bus error fault again. - * - * @param[in] kbdev kbase device to again enable the fault from. - * @param[in] as address space to again enable the fault from. - * @param[in] type The type of fault that needs to be enabled again. - */ -void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, - enum kbase_mmu_fault_type type); - -/** @} *//* end group mali_kbase_mmu_hw */ -/** @} *//* end group base_kbase_api */ - -#endif /* _MALI_KBASE_MMU_HW_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_aarch64.c deleted file mode 100755 index 38ca456477cc..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_aarch64.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#include "mali_kbase.h" -#include "mali_midg_regmap.h" -#include "mali_kbase_defs.h" - -#define ENTRY_TYPE_MASK 3ULL -/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). - * Valid ATE entries at level 3 are flagged with the value 3. - * Valid ATE entries at level 0-2 are flagged with the value 1. - */ -#define ENTRY_IS_ATE_L3 3ULL -#define ENTRY_IS_ATE_L02 1ULL -#define ENTRY_IS_INVAL 2ULL -#define ENTRY_IS_PTE 3ULL - -#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ -#define ENTRY_ACCESS_RO (3ULL << 6) -#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -#define ENTRY_ACCESS_BIT (1ULL << 10) -#define ENTRY_NX_BIT (1ULL << 54) - -/* Helper Function to perform assignment of page table entries, to - * ensure the use of strd, which is required on LPAE systems. - */ -static inline void page_table_entry_set(u64 *pte, u64 phy) -{ -#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE - WRITE_ONCE(*pte, phy); -#else -#ifdef CONFIG_64BIT - barrier(); - *pte = phy; - barrier(); -#elif defined(CONFIG_ARM) - barrier(); - asm volatile("ldrd r0, [%1]\n\t" - "strd r0, %0\n\t" - : "=m" (*pte) - : "r" (&phy) - : "r0", "r1"); - barrier(); -#else -#error "64-bit atomic write must be implemented for your architecture" -#endif -#endif -} - -static void mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) -{ - /* Set up the required caching policies at the correct indices - * in the memattr register. - */ - setup->memattr = - (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_AARCH64_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - (AS_MEMATTR_AARCH64_NON_CACHEABLE << - (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); - - setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; - setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; -} - -static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - int as_nr) -{ - struct kbase_as *as; - struct kbase_mmu_setup *current_setup; - - if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) - return; - - as = &kbdev->as[as_nr]; - current_setup = &as->current_setup; - - mmu_get_as_setup(mmut, current_setup); - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_as * const as = &kbdev->as[as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; - - current_setup->transtab = 0ULL; - current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static phys_addr_t pte_to_phy_addr(u64 entry) -{ - if (!(entry & 1)) - return 0; - - return entry & ~0xFFF; -} - -static int ate_is_valid(u64 ate, unsigned int level) -{ - if (level == MIDGARD_MMU_BOTTOMLEVEL) - return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3); - else - return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02); -} - -static int pte_is_valid(u64 pte, unsigned int level) -{ - /* PTEs cannot exist at the bottom level */ - if (level == MIDGARD_MMU_BOTTOMLEVEL) - return false; - return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -} - -/* - * Map KBASE_REG flags to MMU flags - */ -static u64 get_mmu_flags(unsigned long flags) -{ - u64 mmu_flags; - - /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ - mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; - - /* Set access flags - note that AArch64 stage 1 does not support - * write-only access, so we use read/write instead - */ - if (flags & KBASE_REG_GPU_WR) - mmu_flags |= ENTRY_ACCESS_RW; - else if (flags & KBASE_REG_GPU_RD) - mmu_flags |= ENTRY_ACCESS_RO; - - /* nx if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; - - if (flags & KBASE_REG_SHARE_BOTH) { - /* inner and outer shareable */ - mmu_flags |= SHARE_BOTH_BITS; - } else if (flags & KBASE_REG_SHARE_IN) { - /* inner shareable coherency */ - mmu_flags |= SHARE_INNER_BITS; - } - - return mmu_flags; -} - -static void entry_set_ate(u64 *entry, - struct tagged_addr phy, - unsigned long flags, - unsigned int level) -{ - if (level == MIDGARD_MMU_BOTTOMLEVEL) - page_table_entry_set(entry, as_phys_addr_t(phy) | - get_mmu_flags(flags) | - ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); - else - page_table_entry_set(entry, as_phys_addr_t(phy) | - get_mmu_flags(flags) | - ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); -} - -static void entry_set_pte(u64 *entry, phys_addr_t phy) -{ - page_table_entry_set(entry, (phy & PAGE_MASK) | - ENTRY_ACCESS_BIT | ENTRY_IS_PTE); -} - -static void entry_invalidate(u64 *entry) -{ - page_table_entry_set(entry, ENTRY_IS_INVAL); -} - -static struct kbase_mmu_mode const aarch64_mode = { - .update = mmu_update, - .get_as_setup = mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE -}; - -struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) -{ - return &aarch64_mode; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_lpae.c deleted file mode 100755 index f6bdf91dc225..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_mmu_mode_lpae.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#include "mali_kbase.h" -#include "mali_midg_regmap.h" -#include "mali_kbase_defs.h" - -#define ENTRY_TYPE_MASK 3ULL -#define ENTRY_IS_ATE 1ULL -#define ENTRY_IS_INVAL 2ULL -#define ENTRY_IS_PTE 3ULL - -#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ -#define ENTRY_RD_BIT (1ULL << 6) -#define ENTRY_WR_BIT (1ULL << 7) -#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ -#define ENTRY_ACCESS_BIT (1ULL << 10) -#define ENTRY_NX_BIT (1ULL << 54) - -#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ - ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) - -/* Helper Function to perform assignment of page table entries, to - * ensure the use of strd, which is required on LPAE systems. - */ -static inline void page_table_entry_set(u64 *pte, u64 phy) -{ -#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE - WRITE_ONCE(*pte, phy); -#else -#ifdef CONFIG_64BIT - barrier(); - *pte = phy; - barrier(); -#elif defined(CONFIG_ARM) - barrier(); - asm volatile("ldrd r0, [%1]\n\t" - "strd r0, %0\n\t" - : "=m" (*pte) - : "r" (&phy) - : "r0", "r1"); - barrier(); -#else -#error "64-bit atomic write must be implemented for your architecture" -#endif -#endif -} - -static void mmu_get_as_setup(struct kbase_mmu_table *mmut, - struct kbase_mmu_setup * const setup) -{ - /* Set up the required caching policies at the correct indices - * in the memattr register. */ - setup->memattr = - (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << - (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | - (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | - (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_LPAE_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | - 0; /* The other indices are unused for now */ - - setup->transtab = ((u64)mmut->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | - AS_TRANSTAB_LPAE_ADRMODE_TABLE | - AS_TRANSTAB_LPAE_READ_INNER; - - setup->transcfg = 0; -} - -static void mmu_update(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - int as_nr) -{ - struct kbase_as *as; - struct kbase_mmu_setup *current_setup; - - if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) - return; - - as = &kbdev->as[as_nr]; - current_setup = &as->current_setup; - - mmu_get_as_setup(mmut, current_setup); - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - struct kbase_as * const as = &kbdev->as[as_nr]; - struct kbase_mmu_setup * const current_setup = &as->current_setup; - - current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; - - /* Apply the address space setting */ - kbase_mmu_hw_configure(kbdev, as); -} - -static phys_addr_t pte_to_phy_addr(u64 entry) -{ - if (!(entry & 1)) - return 0; - - return entry & ~0xFFF; -} - -static int ate_is_valid(u64 ate, unsigned int level) -{ - return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); -} - -static int pte_is_valid(u64 pte, unsigned int level) -{ - return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); -} - -/* - * Map KBASE_REG flags to MMU flags - */ -static u64 get_mmu_flags(unsigned long flags) -{ - u64 mmu_flags; - unsigned long memattr_idx; - - memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); - if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, - "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) - memattr_idx = AS_MEMATTR_INDEX_DEFAULT; - /* store mem_attr index as 4:2, noting that: - * - macro called above ensures 3 bits already - * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits - */ - mmu_flags = memattr_idx << 2; - - /* write perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; - /* read perm if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; - /* nx if requested */ - mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; - - if (flags & KBASE_REG_SHARE_BOTH) { - /* inner and outer shareable */ - mmu_flags |= SHARE_BOTH_BITS; - } else if (flags & KBASE_REG_SHARE_IN) { - /* inner shareable coherency */ - mmu_flags |= SHARE_INNER_BITS; - } - - return mmu_flags; -} - -static void entry_set_ate(u64 *entry, - struct tagged_addr phy, - unsigned long flags, - unsigned int level) -{ - page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | - ENTRY_IS_ATE); -} - -static void entry_set_pte(u64 *entry, phys_addr_t phy) -{ - page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); -} - -static void entry_invalidate(u64 *entry) -{ - page_table_entry_set(entry, ENTRY_IS_INVAL); -} - -static struct kbase_mmu_mode const lpae_mode = { - .update = mmu_update, - .get_as_setup = mmu_get_as_setup, - .disable_as = mmu_disable_as, - .pte_to_phy_addr = pte_to_phy_addr, - .ate_is_valid = ate_is_valid, - .pte_is_valid = pte_is_valid, - .entry_set_ate = entry_set_ate, - .entry_set_pte = entry_set_pte, - .entry_invalidate = entry_invalidate, - .flags = 0 -}; - -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) -{ - return &lpae_mode; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_platform_fake.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_platform_fake.c deleted file mode 100755 index fbb090e6c21f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_platform_fake.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include -#include - - -/* - * This file is included only for type definitions and functions belonging to - * specific platform folders. Do not add dependencies with symbols that are - * defined somewhere else. - */ -#include - -#define PLATFORM_CONFIG_RESOURCE_COUNT 4 -#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 - -static struct platform_device *mali_device; - -#ifndef CONFIG_OF -/** - * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources - * - * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function - * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. - * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. - * - * @param[in] io_resource Input IO resource data - * @param[out] linux_resources Pointer to output array of Linux resource structures - */ -static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) -{ - if (!io_resources || !linux_resources) { - pr_err("%s: couldn't find proper resources\n", __func__); - return; - } - - memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); - - linux_resources[0].start = io_resources->io_memory_region.start; - linux_resources[0].end = io_resources->io_memory_region.end; - linux_resources[0].flags = IORESOURCE_MEM; - - linux_resources[1].start = io_resources->job_irq_number; - linux_resources[1].end = io_resources->job_irq_number; - linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; - - linux_resources[2].start = io_resources->mmu_irq_number; - linux_resources[2].end = io_resources->mmu_irq_number; - linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; - - linux_resources[3].start = io_resources->gpu_irq_number; - linux_resources[3].end = io_resources->gpu_irq_number; - linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; -} -#endif /* CONFIG_OF */ - -int kbase_platform_register(void) -{ - struct kbase_platform_config *config; -#ifndef CONFIG_OF - struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; -#endif - int err; - - config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ - if (config == NULL) { - pr_err("%s: couldn't get platform config\n", __func__); - return -ENODEV; - } - - mali_device = platform_device_alloc("mali", 0); - if (mali_device == NULL) - return -ENOMEM; - -#ifndef CONFIG_OF - kbasep_config_parse_io_resources(config->io_resources, resources); - err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); - if (err) { - platform_device_put(mali_device); - mali_device = NULL; - return err; - } -#endif /* CONFIG_OF */ - - err = platform_device_add(mali_device); - if (err) { - platform_device_unregister(mali_device); - mali_device = NULL; - return err; - } - - return 0; -} -EXPORT_SYMBOL(kbase_platform_register); - -void kbase_platform_unregister(void) -{ - if (mali_device) - platform_device_unregister(mali_device); -} -EXPORT_SYMBOL(kbase_platform_unregister); diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.c deleted file mode 100755 index 5699eb8feaf2..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_pm.c - * Base kernel power management APIs - */ - -#include -#include -#include -#include - -#include - -int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) -{ - return kbase_hwaccess_pm_powerup(kbdev, flags); -} - -void kbase_pm_halt(struct kbase_device *kbdev) -{ - kbase_hwaccess_pm_halt(kbdev); -} - -void kbase_pm_context_active(struct kbase_device *kbdev) -{ - (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); -} - -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int c; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - if (kbase_pm_is_suspending(kbdev)) { - switch (suspend_handler) { - case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: - if (kbdev->pm.active_count != 0) - break; - /* FALLTHROUGH */ - case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - return 1; - - case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: - /* FALLTHROUGH */ - default: - KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); - break; - } - } - c = ++kbdev->pm.active_count; - KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); - - if (c == 1) { - /* First context active: Power on the GPU and any cores requested by - * the policy */ - kbase_hwaccess_pm_gpu_active(kbdev); - } - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - - return 0; -} - -KBASE_EXPORT_TEST_API(kbase_pm_context_active); - -void kbase_pm_context_idle(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - int c; - - KBASE_DEBUG_ASSERT(kbdev != NULL); - - - mutex_lock(&js_devdata->runpool_mutex); - mutex_lock(&kbdev->pm.lock); - - c = --kbdev->pm.active_count; - KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); - - KBASE_DEBUG_ASSERT(c >= 0); - - if (c == 0) { - /* Last context has gone idle */ - kbase_hwaccess_pm_gpu_idle(kbdev); - - /* Wake up anyone waiting for this to become 0 (e.g. suspend). The - * waiters must synchronize with us by locking the pm.lock after - * waiting. - */ - wake_up(&kbdev->pm.zero_active_count_wait); - } - - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); -} - -KBASE_EXPORT_TEST_API(kbase_pm_context_idle); - -void kbase_pm_suspend(struct kbase_device *kbdev) -{ - KBASE_DEBUG_ASSERT(kbdev); - - /* Suspend vinstr. This blocks until the vinstr worker and timer are - * no longer running. - */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); - - /* Disable GPU hardware counters. - * This call will block until counters are disabled. - */ - kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - - mutex_lock(&kbdev->pm.lock); - KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); - kbdev->pm.suspending = true; - mutex_unlock(&kbdev->pm.lock); - - /* From now on, the active count will drop towards zero. Sometimes, it'll - * go up briefly before going down again. However, once it reaches zero it - * will stay there - guaranteeing that we've idled all pm references */ - - /* Suspend job scheduler and associated components, so that it releases all - * the PM active count references */ - kbasep_js_suspend(kbdev); - - /* Wait for the active count to reach zero. This is not the same as - * waiting for a power down, since not all policies power down when this - * reaches zero. */ - wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); - - /* NOTE: We synchronize with anything that was just finishing a - * kbase_pm_context_idle() call by locking the pm.lock below */ - - kbase_hwaccess_pm_suspend(kbdev); -} - -void kbase_pm_resume(struct kbase_device *kbdev) -{ - unsigned long flags; - - /* MUST happen before any pm_context_active calls occur */ - kbase_hwaccess_pm_resume(kbdev); - - /* Initial active call, to power on the GPU/cores if needed */ - kbase_pm_context_active(kbdev); - - /* Resume any blocked atoms (which may cause contexts to be scheduled in - * and dependent atoms to run) */ - kbase_resume_suspended_soft_jobs(kbdev); - - /* Resume the Job Scheduler and associated components, and start running - * atoms */ - kbasep_js_resume(kbdev); - - /* Matching idle call, to power off the GPU/cores if we didn't actually - * need it and the policy doesn't want it on */ - kbase_pm_context_idle(kbdev); - - /* Re-enable GPU hardware counters */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - /* Resume vinstr */ - kbase_vinstr_resume(kbdev->vinstr_ctx); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.h deleted file mode 100755 index 59a031467c95..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_pm.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_kbase_pm.h - * Power management API definitions - */ - -#ifndef _KBASE_PM_H_ -#define _KBASE_PM_H_ - -#include "mali_kbase_hwaccess_pm.h" - -#define PM_ENABLE_IRQS 0x01 -#define PM_HW_ISSUES_DETECT 0x02 - - -/** Initialize the power management framework. - * - * Must be called before any other power management function - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - * - * @return 0 if the power management framework was successfully initialized. - */ -int kbase_pm_init(struct kbase_device *kbdev); - -/** Power up GPU after all modules have been initialized and interrupt handlers installed. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - * - * @param flags Flags to pass on to kbase_pm_init_hw - * - * @return 0 if powerup was successful. - */ -int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); - -/** - * Halt the power management framework. - * Should ensure that no new interrupts are generated, - * but allow any currently running interrupt handlers to complete successfully. - * The GPU is forced off by the time this function returns, regardless of - * whether or not the active power policy asks for the GPU to be powered off. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_halt(struct kbase_device *kbdev); - -/** Terminate the power management framework. - * - * No power management functions may be called after this - * (except @ref kbase_pm_init) - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_term(struct kbase_device *kbdev); - -/** Increment the count of active contexts. - * - * This function should be called when a context is about to submit a job. It informs the active power policy that the - * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. - * - * This function will block until the GPU is available. - * - * This function ASSERTS if a suspend is occuring/has occurred whilst this is - * in use. Use kbase_pm_contect_active_unless_suspending() instead. - * - * @note a Suspend is only visible to Kernel threads; user-space threads in a - * syscall cannot witness a suspend, because they are frozen before the suspend - * begins. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_context_active(struct kbase_device *kbdev); - - -/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ -enum kbase_pm_suspend_handler { - /** A suspend is not expected/not possible - this is the same as - * kbase_pm_context_active() */ - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, - /** If we're suspending, fail and don't increase the active count */ - KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, - /** If we're suspending, succeed and allow the active count to increase iff - * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). - * - * This should only be used when there is a bounded time on the activation - * (e.g. guarantee it's going to be idled very soon after) */ - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE -}; - -/** Suspend 'safe' variant of kbase_pm_context_active() - * - * If a suspend is in progress, this allows for various different ways of - * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. - * - * We returns a status code indicating whether we're allowed to keep the GPU - * active during the suspend, depending on the handler code. If the status code - * indicates a failure, the caller must abort whatever operation it was - * attempting, and potentially queue it up for after the OS has resumed. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - * @param suspend_handler The handler code for how to handle a suspend that might occur - * @return zero Indicates success - * @return non-zero Indicates failure due to the system being suspending/suspended. - */ -int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); - -/** Decrement the reference count of active contexts. - * - * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power - * policy so the calling code should ensure that it does not access the GPU's registers. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_context_idle(struct kbase_device *kbdev); - -/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline - * function - */ - -/** - * Suspend the GPU and prevent any further register accesses to it from Kernel - * threads. - * - * This is called in response to an OS suspend event, and calls into the various - * kbase components to complete the suspend. - * - * @note the mechanisms used here rely on all user-space threads being frozen - * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up - * the GPU e.g. via atom submission. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_suspend(struct kbase_device *kbdev); - -/** - * Resume the GPU, allow register accesses to it, and resume running atoms on - * the GPU. - * - * This is called in response to an OS resume event, and calls into the various - * kbase components to complete the resume. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) - */ -void kbase_pm_resume(struct kbase_device *kbdev); - -/** - * kbase_pm_vsync_callback - vsync callback - * - * @buffer_updated: 1 if a new frame was displayed, 0 otherwise - * @data: Pointer to the kbase device as returned by kbase_find_device() - * - * Callback function used to notify the power management code that a vsync has - * occurred on the display. - */ -void kbase_pm_vsync_callback(int buffer_updated, void *data); - -#endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.c deleted file mode 100755 index 763740ec0ae7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase.h" - -#include "mali_kbase_regs_history_debugfs.h" - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -#include - - -static int regs_history_size_get(void *data, u64 *val) -{ - struct kbase_io_history *const h = data; - - *val = h->size; - - return 0; -} - -static int regs_history_size_set(void *data, u64 val) -{ - struct kbase_io_history *const h = data; - - return kbase_io_history_resize(h, (u16)val); -} - - -DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, - regs_history_size_get, - regs_history_size_set, - "%llu\n"); - - -/** - * regs_history_show - show callback for the register access history file. - * - * @sfile: The debugfs entry - * @data: Data associated with the entry - * - * This function is called to dump all recent accesses to the GPU registers. - * - * @return 0 if successfully prints data in debugfs entry file, failure - * otherwise - */ -static int regs_history_show(struct seq_file *sfile, void *data) -{ - struct kbase_io_history *const h = sfile->private; - u16 i; - size_t iters; - unsigned long flags; - - if (!h->enabled) { - seq_puts(sfile, "The register access history is disabled\n"); - goto out; - } - - spin_lock_irqsave(&h->lock, flags); - - iters = (h->size > h->count) ? h->count : h->size; - seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, - h->count); - for (i = 0; i < iters; ++i) { - struct kbase_io_access *io = - &h->buf[(h->count - iters + i) % h->size]; - char const access = (io->addr & 1) ? 'w' : 'r'; - - seq_printf(sfile, "%6i: %c: reg 0x%p val %08x\n", i, access, - (void *)(io->addr & ~0x1), io->value); - } - - spin_unlock_irqrestore(&h->lock, flags); - -out: - return 0; -} - - -/** - * regs_history_open - open operation for regs_history debugfs file - * - * @in: &struct inode pointer - * @file: &struct file pointer - * - * @return file descriptor - */ -static int regs_history_open(struct inode *in, struct file *file) -{ - return single_open(file, ®s_history_show, in->i_private); -} - - -static const struct file_operations regs_history_fops = { - .open = ®s_history_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - - -void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) -{ - debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, - &kbdev->io_history.enabled); - debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, - &kbdev->io_history, ®s_history_size_fops); - debugfs_create_file("regs_history", S_IRUGO, - kbdev->mali_debugfs_directory, &kbdev->io_history, - ®s_history_fops); -} - - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.h deleted file mode 100755 index a0078cb8600d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_regs_history_debugfs.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Header file for register access history support via debugfs - * - * This interface is made available via /sys/kernel/debug/mali#/regs_history*. - * - * Usage: - * - regs_history_enabled: whether recording of register accesses is enabled. - * Write 'y' to enable, 'n' to disable. - * - regs_history_size: size of the register history buffer, must be > 0 - * - regs_history: return the information about last accesses to the registers. - */ - -#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H -#define _KBASE_REGS_HISTORY_DEBUGFS_H - -struct kbase_device; - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -/** - * kbasep_regs_history_debugfs_init - add debugfs entries for register history - * - * @kbdev: Pointer to kbase_device containing the register history - */ -void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); - -#else /* CONFIG_DEBUG_FS */ - -#define kbasep_regs_history_debugfs_init CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ - -#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_replay.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_replay.c deleted file mode 100755 index 92101fec8d5e..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_replay.c +++ /dev/null @@ -1,1156 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_replay.c - * Replay soft job handlers - */ - -#include -#include -#include -#include -#include - -#define JOB_NOT_STARTED 0 -#define JOB_TYPE_NULL (1) -#define JOB_TYPE_VERTEX (5) -#define JOB_TYPE_TILER (7) -#define JOB_TYPE_FUSED (8) -#define JOB_TYPE_FRAGMENT (9) - -#define JOB_HEADER_32_FBD_OFFSET (31*4) -#define JOB_HEADER_64_FBD_OFFSET (44*4) - -#define FBD_POINTER_MASK (~0x3f) - -#define SFBD_TILER_OFFSET (48*4) - -#define MFBD_TILER_OFFSET (14*4) - -#define FBD_HIERARCHY_WEIGHTS 8 -#define FBD_HIERARCHY_MASK_MASK 0x1fff - -#define FBD_TYPE 1 - -#define HIERARCHY_WEIGHTS 13 - -#define JOB_HEADER_ID_MAX 0xffff - -#define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) -#define JOB_POLYGON_LIST (0x03) - -struct fragment_job { - struct job_descriptor_header header; - - u32 x[2]; - union { - u64 _64; - u32 _32; - } fragment_fbd; -}; - -static void dump_job_head(struct kbase_context *kctx, char *head_str, - struct job_descriptor_header *job) -{ -#ifdef CONFIG_MALI_DEBUG - dev_dbg(kctx->kbdev->dev, "%s\n", head_str); - dev_dbg(kctx->kbdev->dev, - "addr = %p\n" - "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" - "first_incomplete_task = %x\n" - "fault_pointer = %llx\n" - "job_descriptor_size = %x\n" - "job_type = %x\n" - "job_barrier = %x\n" - "_reserved_01 = %x\n" - "_reserved_02 = %x\n" - "_reserved_03 = %x\n" - "_reserved_04/05 = %x,%x\n" - "job_index = %x\n" - "dependencies = %x,%x\n", - job, job->exception_status, - JOB_SOURCE_ID(job->exception_status), - (job->exception_status >> 8) & 0x3, - job->exception_status & 0xFF, - job->first_incomplete_task, - job->fault_pointer, job->job_descriptor_size, - job->job_type, job->job_barrier, job->_reserved_01, - job->_reserved_02, job->_reserved_03, - job->_reserved_04, job->_reserved_05, - job->job_index, - job->job_dependency_index_1, - job->job_dependency_index_2); - - if (job->job_descriptor_size) - dev_dbg(kctx->kbdev->dev, "next = %llx\n", - job->next_job._64); - else - dev_dbg(kctx->kbdev->dev, "next = %x\n", - job->next_job._32); -#endif -} - -static int kbasep_replay_reset_sfbd(struct kbase_context *kctx, - u64 fbd_address, u64 tiler_heap_free, - u16 hierarchy_mask, u32 default_weight) -{ - struct { - u32 padding_1[1]; - u32 flags; - u64 padding_2[2]; - u64 heap_free_address; - u32 padding[8]; - u32 weights[FBD_HIERARCHY_WEIGHTS]; - } *fbd_tiler; - struct kbase_vmap_struct map; - - dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); - - fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET, - sizeof(*fbd_tiler), &map); - if (!fbd_tiler) { - dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); - return -EINVAL; - } - -#ifdef CONFIG_MALI_DEBUG - dev_dbg(kctx->kbdev->dev, - "FBD tiler:\n" - "flags = %x\n" - "heap_free_address = %llx\n", - fbd_tiler->flags, fbd_tiler->heap_free_address); -#endif - if (hierarchy_mask) { - u32 weights[HIERARCHY_WEIGHTS]; - u16 old_hierarchy_mask = fbd_tiler->flags & - FBD_HIERARCHY_MASK_MASK; - int i, j = 0; - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) { - if (old_hierarchy_mask & (1 << i)) { - KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); - weights[i] = fbd_tiler->weights[j++]; - } else { - weights[i] = default_weight; - } - } - - - dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", - old_hierarchy_mask, hierarchy_mask); - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) - dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", - i, weights[i]); - - j = 0; - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) { - if (hierarchy_mask & (1 << i)) { - KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); - - dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n", - i, weights[i], j); - - fbd_tiler->weights[j++] = weights[i]; - } - } - - for (; j < FBD_HIERARCHY_WEIGHTS; j++) - fbd_tiler->weights[j] = 0; - - fbd_tiler->flags = hierarchy_mask | (1 << 16); - } - - fbd_tiler->heap_free_address = tiler_heap_free; - - dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n", - fbd_tiler->heap_free_address, fbd_tiler->flags); - - kbase_vunmap(kctx, &map); - - return 0; -} - -static int kbasep_replay_reset_mfbd(struct kbase_context *kctx, - u64 fbd_address, u64 tiler_heap_free, - u16 hierarchy_mask, u32 default_weight) -{ - struct kbase_vmap_struct map; - struct { - u32 padding_0; - u32 flags; - u64 padding_1[2]; - u64 heap_free_address; - u64 padding_2; - u32 weights[FBD_HIERARCHY_WEIGHTS]; - } *fbd_tiler; - - dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); - - fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET, - sizeof(*fbd_tiler), &map); - if (!fbd_tiler) { - dev_err(kctx->kbdev->dev, - "kbasep_replay_reset_fbd: failed to map fbd\n"); - return -EINVAL; - } - -#ifdef CONFIG_MALI_DEBUG - dev_dbg(kctx->kbdev->dev, "FBD tiler:\n" - "flags = %x\n" - "heap_free_address = %llx\n", - fbd_tiler->flags, - fbd_tiler->heap_free_address); -#endif - if (hierarchy_mask) { - u32 weights[HIERARCHY_WEIGHTS]; - u16 old_hierarchy_mask = (fbd_tiler->flags) & - FBD_HIERARCHY_MASK_MASK; - int i, j = 0; - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) { - if (old_hierarchy_mask & (1 << i)) { - KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); - weights[i] = fbd_tiler->weights[j++]; - } else { - weights[i] = default_weight; - } - } - - - dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", - old_hierarchy_mask, hierarchy_mask); - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) - dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", - i, weights[i]); - - j = 0; - - for (i = 0; i < HIERARCHY_WEIGHTS; i++) { - if (hierarchy_mask & (1 << i)) { - KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); - - dev_dbg(kctx->kbdev->dev, - " Writing hierarchy level %02d (%08x) to %d\n", - i, weights[i], j); - - fbd_tiler->weights[j++] = weights[i]; - } - } - - for (; j < FBD_HIERARCHY_WEIGHTS; j++) - fbd_tiler->weights[j] = 0; - - fbd_tiler->flags = hierarchy_mask | (1 << 16); - } - - fbd_tiler->heap_free_address = tiler_heap_free; - - kbase_vunmap(kctx, &map); - - return 0; -} - -/** - * @brief Reset the status of an FBD pointed to by a tiler job - * - * This performs two functions : - * - Set the hierarchy mask - * - Reset the tiler free heap address - * - * @param[in] kctx Context pointer - * @param[in] job_header Address of job header to reset. - * @param[in] tiler_heap_free The value to reset Tiler Heap Free to - * @param[in] hierarchy_mask The hierarchy mask to use - * @param[in] default_weight Default hierarchy weight to write when no other - * weight is given in the FBD - * @param[in] job_64 true if this job is using 64-bit - * descriptors - * - * @return 0 on success, error code on failure - */ -static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx, - u64 job_header, u64 tiler_heap_free, - u16 hierarchy_mask, u32 default_weight, bool job_64) -{ - struct kbase_vmap_struct map; - u64 fbd_address; - - if (job_64) { - u64 *job_ext; - - job_ext = kbase_vmap(kctx, - job_header + JOB_HEADER_64_FBD_OFFSET, - sizeof(*job_ext), &map); - - if (!job_ext) { - dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); - return -EINVAL; - } - - fbd_address = *job_ext; - - kbase_vunmap(kctx, &map); - } else { - u32 *job_ext; - - job_ext = kbase_vmap(kctx, - job_header + JOB_HEADER_32_FBD_OFFSET, - sizeof(*job_ext), &map); - - if (!job_ext) { - dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); - return -EINVAL; - } - - fbd_address = *job_ext; - - kbase_vunmap(kctx, &map); - } - - if (fbd_address & FBD_TYPE) { - return kbasep_replay_reset_mfbd(kctx, - fbd_address & FBD_POINTER_MASK, - tiler_heap_free, - hierarchy_mask, - default_weight); - } else { - return kbasep_replay_reset_sfbd(kctx, - fbd_address & FBD_POINTER_MASK, - tiler_heap_free, - hierarchy_mask, - default_weight); - } -} - -/** - * @brief Reset the status of a job - * - * This performs the following functions : - * - * - Reset the Job Status field of each job to NOT_STARTED. - * - Set the Job Type field of any Vertex Jobs to Null Job. - * - For any jobs using an FBD, set the Tiler Heap Free field to the value of - * the tiler_heap_free parameter, and set the hierarchy level mask to the - * hier_mask parameter. - * - Offset HW dependencies by the hw_job_id_offset parameter - * - Set the Perform Job Barrier flag if this job is the first in the chain - * - Read the address of the next job header - * - * @param[in] kctx Context pointer - * @param[in,out] job_header Address of job header to reset. Set to address - * of next job header on exit. - * @param[in] prev_jc Previous job chain to link to, if this job is - * the last in the chain. - * @param[in] hw_job_id_offset Offset for HW job IDs - * @param[in] tiler_heap_free The value to reset Tiler Heap Free to - * @param[in] hierarchy_mask The hierarchy mask to use - * @param[in] default_weight Default hierarchy weight to write when no other - * weight is given in the FBD - * @param[in] first_in_chain true if this job is the first in the chain - * @param[in] fragment_chain true if this job is in the fragment chain - * - * @return 0 on success, error code on failure - */ -static int kbasep_replay_reset_job(struct kbase_context *kctx, - u64 *job_header, u64 prev_jc, - u64 tiler_heap_free, u16 hierarchy_mask, - u32 default_weight, u16 hw_job_id_offset, - bool first_in_chain, bool fragment_chain) -{ - struct fragment_job *frag_job; - struct job_descriptor_header *job; - u64 new_job_header; - struct kbase_vmap_struct map; - - frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); - if (!frag_job) { - dev_err(kctx->kbdev->dev, - "kbasep_replay_parse_jc: failed to map jc\n"); - return -EINVAL; - } - job = &frag_job->header; - - dump_job_head(kctx, "Job header:", job); - - if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { - dev_err(kctx->kbdev->dev, "Job already not started\n"); - goto out_unmap; - } - job->exception_status = JOB_NOT_STARTED; - - if (job->job_type == JOB_TYPE_VERTEX) - job->job_type = JOB_TYPE_NULL; - - if (job->job_type == JOB_TYPE_FUSED) { - dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); - goto out_unmap; - } - - if (first_in_chain) - job->job_barrier = 1; - - if ((job->job_dependency_index_1 + hw_job_id_offset) > - JOB_HEADER_ID_MAX || - (job->job_dependency_index_2 + hw_job_id_offset) > - JOB_HEADER_ID_MAX || - (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { - dev_err(kctx->kbdev->dev, - "Job indicies/dependencies out of valid range\n"); - goto out_unmap; - } - - if (job->job_dependency_index_1) - job->job_dependency_index_1 += hw_job_id_offset; - if (job->job_dependency_index_2) - job->job_dependency_index_2 += hw_job_id_offset; - - job->job_index += hw_job_id_offset; - - if (job->job_descriptor_size) { - new_job_header = job->next_job._64; - if (!job->next_job._64) - job->next_job._64 = prev_jc; - } else { - new_job_header = job->next_job._32; - if (!job->next_job._32) - job->next_job._32 = prev_jc; - } - dump_job_head(kctx, "Updated to:", job); - - if (job->job_type == JOB_TYPE_TILER) { - bool job_64 = job->job_descriptor_size != 0; - - if (kbasep_replay_reset_tiler_job(kctx, *job_header, - tiler_heap_free, hierarchy_mask, - default_weight, job_64) != 0) - goto out_unmap; - - } else if (job->job_type == JOB_TYPE_FRAGMENT) { - u64 fbd_address; - - if (job->job_descriptor_size) - fbd_address = frag_job->fragment_fbd._64; - else - fbd_address = (u64)frag_job->fragment_fbd._32; - - if (fbd_address & FBD_TYPE) { - if (kbasep_replay_reset_mfbd(kctx, - fbd_address & FBD_POINTER_MASK, - tiler_heap_free, - hierarchy_mask, - default_weight) != 0) - goto out_unmap; - } else { - if (kbasep_replay_reset_sfbd(kctx, - fbd_address & FBD_POINTER_MASK, - tiler_heap_free, - hierarchy_mask, - default_weight) != 0) - goto out_unmap; - } - } - - kbase_vunmap(kctx, &map); - - *job_header = new_job_header; - - return 0; - -out_unmap: - kbase_vunmap(kctx, &map); - return -EINVAL; -} - -/** - * @brief Find the highest job ID in a job chain - * - * @param[in] kctx Context pointer - * @param[in] jc Job chain start address - * @param[out] hw_job_id Highest job ID in chain - * - * @return 0 on success, error code on failure - */ -static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, - u64 jc, u16 *hw_job_id) -{ - while (jc) { - struct job_descriptor_header *job; - struct kbase_vmap_struct map; - - dev_dbg(kctx->kbdev->dev, - "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc); - - job = kbase_vmap(kctx, jc, sizeof(*job), &map); - if (!job) { - dev_err(kctx->kbdev->dev, "failed to map jc\n"); - - return -EINVAL; - } - - if (job->job_index > *hw_job_id) - *hw_job_id = job->job_index; - - if (job->job_descriptor_size) - jc = job->next_job._64; - else - jc = job->next_job._32; - - kbase_vunmap(kctx, &map); - } - - return 0; -} - -/** - * @brief Reset the status of a number of jobs - * - * This function walks the provided job chain, and calls - * kbasep_replay_reset_job for each job. It also links the job chain to the - * provided previous job chain. - * - * The function will fail if any of the jobs passed already have status of - * NOT_STARTED. - * - * @param[in] kctx Context pointer - * @param[in] jc Job chain to be processed - * @param[in] prev_jc Job chain to be added to. May be NULL - * @param[in] tiler_heap_free The value to reset Tiler Heap Free to - * @param[in] hierarchy_mask The hierarchy mask to use - * @param[in] default_weight Default hierarchy weight to write when no other - * weight is given in the FBD - * @param[in] hw_job_id_offset Offset for HW job IDs - * @param[in] fragment_chain true if this chain is the fragment chain - * - * @return 0 on success, error code otherwise - */ -static int kbasep_replay_parse_jc(struct kbase_context *kctx, - u64 jc, u64 prev_jc, - u64 tiler_heap_free, u16 hierarchy_mask, - u32 default_weight, u16 hw_job_id_offset, - bool fragment_chain) -{ - bool first_in_chain = true; - int nr_jobs = 0; - - dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n", - jc, hw_job_id_offset); - - while (jc) { - dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc); - - if (kbasep_replay_reset_job(kctx, &jc, prev_jc, - tiler_heap_free, hierarchy_mask, - default_weight, hw_job_id_offset, - first_in_chain, fragment_chain) != 0) - return -EINVAL; - - first_in_chain = false; - - nr_jobs++; - if (fragment_chain && - nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) { - dev_err(kctx->kbdev->dev, - "Exceeded maximum number of jobs in fragment chain\n"); - return -EINVAL; - } - } - - return 0; -} - -/** - * @brief Reset the status of a replay job, and set up dependencies - * - * This performs the actions to allow the replay job to be re-run following - * completion of the passed dependency. - * - * @param[in] katom The atom to be reset - * @param[in] dep_atom The dependency to be attached to the atom - */ -static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom, - struct kbase_jd_atom *dep_atom) -{ - katom->status = KBASE_JD_ATOM_STATE_QUEUED; - kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA); - list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]); -} - -/** - * @brief Allocate an unused katom - * - * This will search the provided context for an unused katom, and will mark it - * as KBASE_JD_ATOM_STATE_QUEUED. - * - * If no atoms are available then the function will fail. - * - * @param[in] kctx Context pointer - * @return An atom ID, or -1 on failure - */ -static int kbasep_allocate_katom(struct kbase_context *kctx) -{ - struct kbase_jd_context *jctx = &kctx->jctx; - int i; - - for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { - if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { - jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kctx->kbdev->dev, - "kbasep_allocate_katom: Allocated atom %d\n", - i); - return i; - } - } - - return -1; -} - -/** - * @brief Release a katom - * - * This will mark the provided atom as available, and remove any dependencies. - * - * For use on error path. - * - * @param[in] kctx Context pointer - * @param[in] atom_id ID of atom to release - */ -static void kbasep_release_katom(struct kbase_context *kctx, int atom_id) -{ - struct kbase_jd_context *jctx = &kctx->jctx; - - dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n", - atom_id); - - while (!list_empty(&jctx->atoms[atom_id].dep_head[0])) - list_del(jctx->atoms[atom_id].dep_head[0].next); - - while (!list_empty(&jctx->atoms[atom_id].dep_head[1])) - list_del(jctx->atoms[atom_id].dep_head[1].next); - - jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED; -} - -static void kbasep_replay_create_atom(struct kbase_context *kctx, - struct base_jd_atom_v2 *atom, - int atom_nr, - base_jd_prio prio) -{ - atom->nr_extres = 0; - atom->extres_list = 0; - atom->device_nr = 0; - atom->prio = prio; - atom->atom_number = atom_nr; - - base_jd_atom_dep_set(&atom->pre_dep[0], 0, BASE_JD_DEP_TYPE_INVALID); - base_jd_atom_dep_set(&atom->pre_dep[1], 0, BASE_JD_DEP_TYPE_INVALID); - - atom->udata.blob[0] = 0; - atom->udata.blob[1] = 0; -} - -/** - * @brief Create two atoms for the purpose of replaying jobs - * - * Two atoms are allocated and created. The jc pointer is not set at this - * stage. The second atom has a dependency on the first. The remaining fields - * are set up as follows : - * - * - No external resources. Any required external resources will be held by the - * replay atom. - * - device_nr is set to 0. This is not relevant as - * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. - * - Priority is inherited from the replay job. - * - * @param[out] t_atom Atom to use for tiler jobs - * @param[out] f_atom Atom to use for fragment jobs - * @param[in] prio Priority of new atom (inherited from replay soft - * job) - * @return 0 on success, error code on failure - */ -static int kbasep_replay_create_atoms(struct kbase_context *kctx, - struct base_jd_atom_v2 *t_atom, - struct base_jd_atom_v2 *f_atom, - base_jd_prio prio) -{ - int t_atom_nr, f_atom_nr; - - t_atom_nr = kbasep_allocate_katom(kctx); - if (t_atom_nr < 0) { - dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); - return -EINVAL; - } - - f_atom_nr = kbasep_allocate_katom(kctx); - if (f_atom_nr < 0) { - dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); - kbasep_release_katom(kctx, t_atom_nr); - return -EINVAL; - } - - kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); - kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); - - base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr, - BASE_JD_DEP_TYPE_DATA); - - return 0; -} - -#ifdef CONFIG_MALI_DEBUG -static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload) -{ - u64 next; - - dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n"); - next = payload->tiler_jc_list; - - while (next) { - struct kbase_vmap_struct map; - base_jd_replay_jc *jc_struct; - - jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map); - - if (!jc_struct) - return; - - dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n", - jc_struct, jc_struct->jc, jc_struct->next); - - next = jc_struct->next; - - kbase_vunmap(kctx, &map); - } -} -#endif - -/** - * @brief Parse a base_jd_replay_payload provided by userspace - * - * This will read the payload from userspace, and parse the job chains. - * - * @param[in] kctx Context pointer - * @param[in] replay_atom Replay soft job atom - * @param[in] t_atom Atom to use for tiler jobs - * @param[in] f_atom Atom to use for fragment jobs - * @return 0 on success, error code on failure - */ -static int kbasep_replay_parse_payload(struct kbase_context *kctx, - struct kbase_jd_atom *replay_atom, - struct base_jd_atom_v2 *t_atom, - struct base_jd_atom_v2 *f_atom) -{ - base_jd_replay_payload *payload = NULL; - u64 next; - u64 prev_jc = 0; - u16 hw_job_id_offset = 0; - int ret = -EINVAL; - struct kbase_vmap_struct map; - - dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n", - replay_atom->jc, sizeof(payload)); - - payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); - if (!payload) { - dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); - return -EINVAL; - } - -#ifdef CONFIG_MALI_DEBUG - dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); - dev_dbg(kctx->kbdev->dev, "Payload structure:\n" - "tiler_jc_list = %llx\n" - "fragment_jc = %llx\n" - "tiler_heap_free = %llx\n" - "fragment_hierarchy_mask = %x\n" - "tiler_hierarchy_mask = %x\n" - "hierarchy_default_weight = %x\n" - "tiler_core_req = %x\n" - "fragment_core_req = %x\n", - payload->tiler_jc_list, - payload->fragment_jc, - payload->tiler_heap_free, - payload->fragment_hierarchy_mask, - payload->tiler_hierarchy_mask, - payload->hierarchy_default_weight, - payload->tiler_core_req, - payload->fragment_core_req); - payload_dump(kctx, payload); -#endif - t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; - f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; - - /* Sanity check core requirements*/ - if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || - (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || - t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || - f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - - int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; - int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; - int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; - int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; - - if (t_atom_type != BASE_JD_REQ_T) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", - t_atom_type, BASE_JD_REQ_T); - } - if (f_atom_type != BASE_JD_REQ_FS) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", - f_atom_type, BASE_JD_REQ_FS); - } - if (t_has_ex_res) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); - } - if (f_has_ex_res) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); - } - - goto out; - } - - /* Process tiler job chains */ - next = payload->tiler_jc_list; - if (!next) { - dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n"); - goto out; - } - - while (next) { - base_jd_replay_jc *jc_struct; - struct kbase_vmap_struct jc_map; - u64 jc; - - jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map); - - if (!jc_struct) { - dev_err(kctx->kbdev->dev, "Failed to map jc struct\n"); - goto out; - } - - jc = jc_struct->jc; - next = jc_struct->next; - if (next) - jc_struct->jc = 0; - - kbase_vunmap(kctx, &jc_map); - - if (jc) { - u16 max_hw_job_id = 0; - - if (kbasep_replay_find_hw_job_id(kctx, jc, - &max_hw_job_id) != 0) - goto out; - - if (kbasep_replay_parse_jc(kctx, jc, prev_jc, - payload->tiler_heap_free, - payload->tiler_hierarchy_mask, - payload->hierarchy_default_weight, - hw_job_id_offset, false) != 0) { - goto out; - } - - hw_job_id_offset += max_hw_job_id; - - prev_jc = jc; - } - } - t_atom->jc = prev_jc; - - /* Process fragment job chain */ - f_atom->jc = payload->fragment_jc; - if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0, - payload->tiler_heap_free, - payload->fragment_hierarchy_mask, - payload->hierarchy_default_weight, 0, - true) != 0) { - goto out; - } - - if (!t_atom->jc || !f_atom->jc) { - dev_err(kctx->kbdev->dev, "Invalid payload\n"); - goto out; - } - - dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n", - t_atom->jc, f_atom->jc); - ret = 0; - -out: - kbase_vunmap(kctx, &map); - - return ret; -} - -static void kbase_replay_process_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom; - struct kbase_context *kctx; - struct kbase_jd_context *jctx; - bool need_to_try_schedule_context = false; - - struct base_jd_atom_v2 t_atom, f_atom; - struct kbase_jd_atom *t_katom, *f_katom; - base_jd_prio atom_prio; - - katom = container_of(data, struct kbase_jd_atom, work); - kctx = katom->kctx; - jctx = &kctx->jctx; - - mutex_lock(&jctx->lock); - - atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority); - - if (kbasep_replay_create_atoms( - kctx, &t_atom, &f_atom, atom_prio) != 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - goto out; - } - - t_katom = &jctx->atoms[t_atom.atom_number]; - f_katom = &jctx->atoms[f_atom.atom_number]; - - if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) { - kbasep_release_katom(kctx, t_atom.atom_number); - kbasep_release_katom(kctx, f_atom.atom_number); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - goto out; - } - - kbasep_replay_reset_softjob(katom, f_katom); - - need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom); - if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { - dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); - kbasep_release_katom(kctx, f_atom.atom_number); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - goto out; - } - need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom); - if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { - dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - goto out; - } - - katom->event_code = BASE_JD_EVENT_DONE; - -out: - if (katom->event_code != BASE_JD_EVENT_DONE) { - kbase_disjoint_state_down(kctx->kbdev); - - need_to_try_schedule_context |= jd_done_nolock(katom, NULL); - } - - if (need_to_try_schedule_context) - kbase_js_sched_all(kctx->kbdev); - - mutex_unlock(&jctx->lock); -} - -/** - * @brief Check job replay fault - * - * This will read the job payload, checks fault type and source, then decides - * whether replay is required. - * - * @param[in] katom The atom to be processed - * @return true (success) if replay required or false on failure. - */ -static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct device *dev = kctx->kbdev->dev; - base_jd_replay_payload *payload; - u64 job_header; - u64 job_loop_detect; - struct job_descriptor_header *job; - struct kbase_vmap_struct job_map; - struct kbase_vmap_struct map; - bool err = false; - - /* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or - * if force_replay is enabled. - */ - if (BASE_JD_EVENT_TERMINATED == katom->event_code) { - return false; - } else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) { - return true; - } else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) { - katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT; - return true; - } else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) { - /* No replay for faults of type other than - * BASE_JD_EVENT_DATA_INVALID_FAULT. - */ - return false; - } - - /* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc - * to find out whether the source of exception is POLYGON_LIST. Replay - * is required if the source of fault is POLYGON_LIST. - */ - payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map); - if (!payload) { - dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n"); - return false; - } - -#ifdef CONFIG_MALI_DEBUG - dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload); - dev_dbg(dev, "\nPayload structure:\n" - "fragment_jc = 0x%llx\n" - "fragment_hierarchy_mask = 0x%x\n" - "fragment_core_req = 0x%x\n", - payload->fragment_jc, - payload->fragment_hierarchy_mask, - payload->fragment_core_req); -#endif - /* Process fragment job chain */ - job_header = (u64) payload->fragment_jc; - job_loop_detect = job_header; - while (job_header) { - job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map); - if (!job) { - dev_err(dev, "failed to map jc\n"); - /* unmap payload*/ - kbase_vunmap(kctx, &map); - return false; - } - - - dump_job_head(kctx, "\njob_head structure:\n", job); - - /* Replay only when the polygon list reader caused the - * DATA_INVALID_FAULT */ - if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && - (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { - err = true; - kbase_vunmap(kctx, &job_map); - break; - } - - /* Move on to next fragment job in the list */ - if (job->job_descriptor_size) - job_header = job->next_job._64; - else - job_header = job->next_job._32; - - kbase_vunmap(kctx, &job_map); - - /* Job chain loop detected */ - if (job_header == job_loop_detect) - break; - } - - /* unmap payload*/ - kbase_vunmap(kctx, &map); - - return err; -} - - -/** - * @brief Process a replay job - * - * Called from kbase_process_soft_job. - * - * On exit, if the job has completed, katom->event_code will have been updated. - * If the job has not completed, and is replaying jobs, then the atom status - * will have been reset to KBASE_JD_ATOM_STATE_QUEUED. - * - * @param[in] katom The atom to be processed - * @return false if the atom has completed - * true if the atom is replaying jobs - */ -bool kbase_replay_process(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct kbase_device *kbdev = kctx->kbdev; - - /* Don't replay this atom if these issues are not present in the - * hardware */ - if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11020) && - !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11024)) { - dev_dbg(kbdev->dev, "Hardware does not need replay workaround"); - - /* Signal failure to userspace */ - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - - return false; - } - - if (katom->event_code == BASE_JD_EVENT_DONE) { - dev_dbg(kbdev->dev, "Previous job succeeded - not replaying\n"); - - if (katom->retry_count) - kbase_disjoint_state_down(kbdev); - - return false; - } - - if (kbase_ctx_flag(kctx, KCTX_DYING)) { - dev_dbg(kbdev->dev, "Not replaying; context is dying\n"); - - if (katom->retry_count) - kbase_disjoint_state_down(kbdev); - - return false; - } - - /* Check job exception type and source before replaying. */ - if (!kbase_replay_fault_check(katom)) { - dev_dbg(kbdev->dev, - "Replay cancelled on event %x\n", katom->event_code); - /* katom->event_code is already set to the failure code of the - * previous job. - */ - return false; - } - - dev_warn(kbdev->dev, "Replaying jobs retry=%d\n", - katom->retry_count); - - katom->retry_count++; - - if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { - dev_err(kbdev->dev, "Replay exceeded limit - failing jobs\n"); - - kbase_disjoint_state_down(kbdev); - - /* katom->event_code is already set to the failure code of the - previous job */ - return false; - } - - /* only enter the disjoint state once for the whole time while the replay is ongoing */ - if (katom->retry_count == 1) - kbase_disjoint_state_up(kbdev); - - INIT_WORK(&katom->work, kbase_replay_process_worker); - queue_work(kctx->event_workq, &katom->work); - - return true; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.c deleted file mode 100755 index 2176479959a5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifdef CONFIG_ARM64 - -#include -#include - -#include - -static noinline u64 invoke_smc_fid(u64 function_id, - u64 arg0, u64 arg1, u64 arg2) -{ - register u64 x0 asm("x0") = function_id; - register u64 x1 asm("x1") = arg0; - register u64 x2 asm("x2") = arg1; - register u64 x3 asm("x3") = arg2; - - asm volatile( - __asmeq("%0", "x0") - __asmeq("%1", "x1") - __asmeq("%2", "x2") - __asmeq("%3", "x3") - "smc #0\n" - : "+r" (x0) - : "r" (x1), "r" (x2), "r" (x3)); - - return x0; -} - -u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) -{ - /* Is fast call (bit 31 set) */ - KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); - /* bits 16-23 must be zero for fast calls */ - KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); - - return invoke_smc_fid(fid, arg0, arg1, arg2); -} - -u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, - u64 arg0, u64 arg1, u64 arg2) -{ - u32 fid = 0; - - /* Only the six bits allowed should be used. */ - KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); - - fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ - if (smc64) - fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ - fid |= oen; /* Bit 29:24: OEN */ - /* Bit 23:16: Must be zero for fast calls */ - fid |= (function_number); /* Bit 15:0: function number */ - - return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); -} - -#endif /* CONFIG_ARM64 */ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.h deleted file mode 100755 index 221eb21a8c7f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_smc.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _KBASE_SMC_H_ -#define _KBASE_SMC_H_ - -#ifdef CONFIG_ARM64 - -#include - -#define SMC_FAST_CALL (1 << 31) -#define SMC_64 (1 << 30) - -#define SMC_OEN_OFFSET 24 -#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ -#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) -#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) - - -/** - * kbase_invoke_smc_fid - Perform a secure monitor call - * @fid: The SMC function to call, see SMC Calling convention. - * @arg0: First argument to the SMC. - * @arg1: Second argument to the SMC. - * @arg2: Third argument to the SMC. - * - * See SMC Calling Convention for details. - * - * Return: the return value from the SMC. - */ -u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); - -/** - * kbase_invoke_smc_fid - Perform a secure monitor call - * @oen: Owning Entity number (SIP, STD etc). - * @function_number: The function number within the OEN. - * @smc64: use SMC64 calling convention instead of SMC32. - * @arg0: First argument to the SMC. - * @arg1: Second argument to the SMC. - * @arg2: Third argument to the SMC. - * - * See SMC Calling Convention for details. - * - * Return: the return value from the SMC call. - */ -u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, - u64 arg0, u64 arg1, u64 arg2); - -#endif /* CONFIG_ARM64 */ - -#endif /* _KBASE_SMC_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_softjobs.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_softjobs.c deleted file mode 100755 index e762af4a2bcb..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_softjobs.c +++ /dev/null @@ -1,1686 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include - -#if defined(CONFIG_DMA_SHARED_BUFFER) -#include -#include -#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * @file mali_kbase_softjobs.c - * - * This file implements the logic behind software only jobs that are - * executed within the driver rather than being handed over to the GPU. - */ - -static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_del(&katom->queue); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - /* Record the start time of this atom so we could cancel it at - * the right time. - */ - katom->start_timestamp = ktime_get(); - - /* Add the atom to the waiting list before the timer is - * (re)started to make sure that it gets processed. - */ - kbasep_add_waiting_soft_job(katom); - - /* Schedule timeout of this atom after a period if it is not active */ - if (!timer_pending(&kctx->soft_job_timeout)) { - int timeout_ms = atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); - mod_timer(&kctx->soft_job_timeout, - jiffies + msecs_to_jiffies(timeout_ms)); - } -} - -static int kbasep_read_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char *status) -{ - unsigned char *mapped_evt; - struct kbase_vmap_struct map; - - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); - if (!mapped_evt) - return -EFAULT; - - *status = *mapped_evt; - - kbase_vunmap(kctx, &map); - - return 0; -} - -static int kbasep_write_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char new_status) -{ - unsigned char *mapped_evt; - struct kbase_vmap_struct map; - - if ((new_status != BASE_JD_SOFT_EVENT_SET) && - (new_status != BASE_JD_SOFT_EVENT_RESET)) - return -EINVAL; - - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); - if (!mapped_evt) - return -EFAULT; - - *mapped_evt = new_status; - - kbase_vunmap(kctx, &map); - - return 0; -} - -static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) -{ - struct kbase_vmap_struct map; - void *user_result; - struct timespec ts; - struct base_dump_cpu_gpu_counters data; - u64 system_time; - u64 cycle_counter; - u64 jc = katom->jc; - struct kbase_context *kctx = katom->kctx; - int pm_active_err; - - memset(&data, 0, sizeof(data)); - - /* Take the PM active reference as late as possible - otherwise, it could - * delay suspend until we process the atom (which may be at the end of a - * long chain of dependencies */ - pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); - if (pm_active_err) { - struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; - - /* We're suspended - queue this on the list of suspended jobs - * Use dep_item[1], because dep_item[0] was previously in use - * for 'waiting_soft_jobs'. - */ - mutex_lock(&js_devdata->runpool_mutex); - list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); - mutex_unlock(&js_devdata->runpool_mutex); - - /* Also adding this to the list of waiting soft job */ - kbasep_add_waiting_soft_job(katom); - - return pm_active_err; - } - - kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, - &ts); - - kbase_pm_context_idle(kctx->kbdev); - - data.sec = ts.tv_sec; - data.usec = ts.tv_nsec / 1000; - data.system_time = system_time; - data.cycle_counter = cycle_counter; - - /* Assume this atom will be cancelled until we know otherwise */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - /* GPU_WR access is checked on the range for returning the result to - * userspace for the following reasons: - * - security, this is currently how imported user bufs are checked. - * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ - user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); - if (!user_result) - return 0; - - memcpy(user_result, &data, sizeof(data)); - - kbase_vunmap(kctx, &map); - - /* Atom was fine - mark it as done */ - katom->event_code = BASE_JD_EVENT_DONE; - - return 0; -} - -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) -/* Called by the explicit fence mechanism when a fence wait has completed */ -void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - mutex_lock(&kctx->jctx.lock); - kbasep_remove_waiting_soft_job(katom); - kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(kctx->kbdev); - mutex_unlock(&kctx->jctx.lock); -} -#endif - -static void kbasep_soft_event_complete_job(struct work_struct *work) -{ - struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, - work); - struct kbase_context *kctx = katom->kctx; - int resched; - - mutex_lock(&kctx->jctx.lock); - resched = jd_done_nolock(katom, NULL); - mutex_unlock(&kctx->jctx.lock); - - if (resched) - kbase_js_sched_all(kctx->kbdev); -} - -void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) -{ - int cancel_timer = 1; - struct list_head *entry, *tmp; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry( - entry, struct kbase_jd_atom, queue); - - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_EVENT_WAIT: - if (katom->jc == evt) { - list_del(&katom->queue); - - katom->event_code = BASE_JD_EVENT_DONE; - INIT_WORK(&katom->work, - kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, - &katom->work); - } else { - /* There are still other waiting jobs, we cannot - * cancel the timer yet. - */ - cancel_timer = 0; - } - break; -#ifdef CONFIG_MALI_FENCE_DEBUG - case BASE_JD_REQ_SOFT_FENCE_WAIT: - /* Keep the timer running if fence debug is enabled and - * there are waiting fence jobs. - */ - cancel_timer = 0; - break; -#endif - } - } - - if (cancel_timer) - del_timer(&kctx->soft_job_timeout); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -#ifdef CONFIG_MALI_FENCE_DEBUG -static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct device *dev = kctx->kbdev->dev; - int i; - - for (i = 0; i < 2; i++) { - struct kbase_jd_atom *dep; - - list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { - if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || - dep->status == KBASE_JD_ATOM_STATE_COMPLETED) - continue; - - if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { - /* Found blocked trigger fence. */ - struct kbase_sync_fence_info info; - - if (!kbase_sync_fence_in_info_get(dep, &info)) { - dev_warn(dev, - "\tVictim trigger atom %d fence [%p] %s: %s\n", - kbase_jd_atom_id(kctx, dep), - info.fence, - info.name, - kbase_sync_status_string(info.status)); - } - } - - kbase_fence_debug_check_atom(dep); - } - } -} - -static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct device *dev = katom->kctx->kbdev->dev; - int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); - unsigned long lflags; - struct kbase_sync_fence_info info; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - - if (kbase_sync_fence_in_info_get(katom, &info)) { - /* Fence must have signaled just after timeout. */ - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); - return; - } - - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", - kctx->tgid, kctx->id, - kbase_jd_atom_id(kctx, katom), - info.fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", - info.fence, info.name, - kbase_sync_status_string(info.status)); - - /* Search for blocked trigger atoms */ - kbase_fence_debug_check_atom(katom); - - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); - - kbase_sync_fence_in_dump(katom); -} - -struct kbase_fence_debug_work { - struct kbase_jd_atom *katom; - struct work_struct work; -}; - -static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) -{ - struct kbase_fence_debug_work *w = container_of(work, - struct kbase_fence_debug_work, work); - struct kbase_jd_atom *katom = w->katom; - struct kbase_context *kctx = katom->kctx; - - mutex_lock(&kctx->jctx.lock); - kbase_fence_debug_wait_timeout(katom); - mutex_unlock(&kctx->jctx.lock); - - kfree(w); -} - -static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_fence_debug_work *work; - struct kbase_context *kctx = katom->kctx; - - /* Enqueue fence debug worker. Use job_done_wq to get - * debug print ordered with job completion. - */ - work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); - /* Ignore allocation failure. */ - if (work) { - work->katom = katom; - INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); - queue_work(kctx->jctx.job_done_wq, &work->work); - } -} -#endif /* CONFIG_MALI_FENCE_DEBUG */ - -void kbasep_soft_job_timeout_worker(struct timer_list *timer) -{ - struct kbase_context *kctx = container_of(timer, struct kbase_context, - soft_job_timeout); - u32 timeout_ms = (u32)atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); - ktime_t cur_time = ktime_get(); - bool restarting = false; - unsigned long lflags; - struct list_head *entry, *tmp; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry(entry, - struct kbase_jd_atom, queue); - s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, - katom->start_timestamp)); - - if (elapsed_time < (s64)timeout_ms) { - restarting = true; - continue; - } - - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_EVENT_WAIT: - /* Take it out of the list to ensure that it - * will be cancelled in all cases - */ - list_del(&katom->queue); - - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - INIT_WORK(&katom->work, kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, &katom->work); - break; -#ifdef CONFIG_MALI_FENCE_DEBUG - case BASE_JD_REQ_SOFT_FENCE_WAIT: - kbase_fence_debug_timeout(katom); - break; -#endif - } - } - - if (restarting) - mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - unsigned char status; - - /* The status of this soft-job is stored in jc */ - if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - return 0; - } - - if (status == BASE_JD_SOFT_EVENT_SET) - return 0; /* Event already set, nothing to do */ - - kbasep_add_waiting_with_timeout(katom); - - return 1; -} - -static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, - unsigned char new_status) -{ - /* Complete jobs waiting on the same event */ - struct kbase_context *kctx = katom->kctx; - - if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - return; - } - - if (new_status == BASE_JD_SOFT_EVENT_SET) - kbasep_complete_triggered_soft_events(kctx, katom->jc); -} - -/** - * kbase_soft_event_update() - Update soft event state - * @kctx: Pointer to context - * @event: Event to update - * @new_status: New status value of event - * - * Update the event, and wake up any atoms waiting for the event. - * - * Return: 0 on success, a negative error code on failure. - */ -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status) -{ - int err = 0; - - mutex_lock(&kctx->jctx.lock); - - if (kbasep_write_soft_event_status(kctx, event, new_status)) { - err = -ENOENT; - goto out; - } - - if (new_status == BASE_JD_SOFT_EVENT_SET) - kbasep_complete_triggered_soft_events(kctx, event); - -out: - mutex_unlock(&kctx->jctx.lock); - - return err; -} - -static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) -{ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(katom->kctx->kbdev); -} - -static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers = katom->softjob_data; - unsigned int i; - unsigned int nr = katom->nr_extres; - - if (!buffers) - return; - - kbase_gpu_vm_lock(katom->kctx); - for (i = 0; i < nr; i++) { - int p; - struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; - - if (!buffers[i].pages) - break; - for (p = 0; p < buffers[i].nr_pages; p++) { - struct page *pg = buffers[i].pages[p]; - - if (pg) - put_page(pg); - } - if (buffers[i].is_vmalloc) - vfree(buffers[i].pages); - else - kfree(buffers[i].pages); - if (gpu_alloc) { - switch (gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - kbase_free_user_buffer(&buffers[i]); - break; - } - default: - /* Nothing to be done. */ - break; - } - kbase_mem_phy_alloc_put(gpu_alloc); - } - } - kbase_gpu_vm_unlock(katom->kctx); - kfree(buffers); - - katom->softjob_data = NULL; -} - -static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers; - struct base_jd_debug_copy_buffer *user_buffers = NULL; - unsigned int i; - unsigned int nr = katom->nr_extres; - int ret = 0; - void __user *user_structs = (void __user *)(uintptr_t)katom->jc; - - if (!user_structs) - return -EINVAL; - - buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); - if (!buffers) { - ret = -ENOMEM; - goto out_cleanup; - } - katom->softjob_data = buffers; - - user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); - - if (!user_buffers) { - ret = -ENOMEM; - goto out_cleanup; - } - - ret = copy_from_user(user_buffers, user_structs, - sizeof(*user_buffers)*nr); - if (ret) { - ret = -EFAULT; - goto out_cleanup; - } - - for (i = 0; i < nr; i++) { - u64 addr = user_buffers[i].address; - u64 page_addr = addr & PAGE_MASK; - u64 end_page_addr = addr + user_buffers[i].size - 1; - u64 last_page_addr = end_page_addr & PAGE_MASK; - int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; - int pinned_pages; - struct kbase_va_region *reg; - struct base_external_resource user_extres; - - if (!addr) - continue; - - if (last_page_addr < page_addr) { - ret = -EINVAL; - goto out_cleanup; - } - - buffers[i].nr_pages = nr_pages; - buffers[i].offset = addr & ~PAGE_MASK; - if (buffers[i].offset >= PAGE_SIZE) { - ret = -EINVAL; - goto out_cleanup; - } - buffers[i].size = user_buffers[i].size; - - if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / - sizeof(struct page *))) { - buffers[i].is_vmalloc = true; - buffers[i].pages = vzalloc(nr_pages * - sizeof(struct page *)); - } else { - buffers[i].is_vmalloc = false; - buffers[i].pages = kcalloc(nr_pages, - sizeof(struct page *), GFP_KERNEL); - } - - if (!buffers[i].pages) { - ret = -ENOMEM; - goto out_cleanup; - } - - pinned_pages = get_user_pages_fast(page_addr, - nr_pages, - 1, /* Write */ - buffers[i].pages); - if (pinned_pages < 0) { - ret = pinned_pages; - goto out_cleanup; - } - if (pinned_pages != nr_pages) { - ret = -EINVAL; - goto out_cleanup; - } - - user_extres = user_buffers[i].extres; - if (user_extres.ext_resource == 0ULL) { - ret = -EINVAL; - goto out_cleanup; - } - - kbase_gpu_vm_lock(katom->kctx); - reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, user_extres.ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE); - - if (NULL == reg || NULL == reg->gpu_alloc || - (reg->flags & KBASE_REG_FREE)) { - ret = -EINVAL; - goto out_unlock; - } - - buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); - buffers[i].nr_extres_pages = reg->nr_pages; - - if (reg->nr_pages*PAGE_SIZE != buffers[i].size) - dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); - - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; - unsigned long nr_pages = - alloc->imported.user_buf.nr_pages; - - if (alloc->imported.user_buf.mm != current->mm) { - ret = -EINVAL; - goto out_unlock; - } - buffers[i].extres_pages = kcalloc(nr_pages, - sizeof(struct page *), GFP_KERNEL); - if (!buffers[i].extres_pages) { - ret = -ENOMEM; - goto out_unlock; - } - - ret = get_user_pages_fast( - alloc->imported.user_buf.address, - nr_pages, 0, - buffers[i].extres_pages); - if (ret != nr_pages) - goto out_unlock; - ret = 0; - break; - } - default: - /* Nothing to be done. */ - break; - } - kbase_gpu_vm_unlock(katom->kctx); - } - kfree(user_buffers); - - return ret; - -out_unlock: - kbase_gpu_vm_unlock(katom->kctx); - -out_cleanup: - /* Frees allocated memory for kbase_debug_copy_job struct, including - * members, and sets jc to 0 */ - kbase_debug_copy_finish(katom); - kfree(user_buffers); - - return ret; -} - -void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, - void *extres_page, struct page **pages, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset, size_t *to_copy) -{ - void *target_page = kmap(pages[*target_page_nr]); - size_t chunk = PAGE_SIZE-offset; - - lockdep_assert_held(&kctx->reg_lock); - - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; - } - - chunk = min(chunk, *to_copy); - - memcpy(target_page + offset, extres_page, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); - - *target_page_nr += 1; - if (*target_page_nr >= nr_pages) - return; - - target_page = kmap(pages[*target_page_nr]); - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; - } - - KBASE_DEBUG_ASSERT(target_page); - - chunk = min(offset, *to_copy); - memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); -} - -int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) -{ - unsigned int i; - unsigned int target_page_nr = 0; - struct page **pages = buf_data->pages; - u64 offset = buf_data->offset; - size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; - size_t to_copy = min(extres_size, buf_data->size); - struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; - int ret = 0; -#ifdef CONFIG_DMA_SHARED_BUFFER - size_t dma_to_copy; -#endif - - KBASE_DEBUG_ASSERT(pages != NULL); - - kbase_gpu_vm_lock(kctx); - if (!gpu_alloc) { - ret = -EINVAL; - goto out_unlock; - } - - switch (gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - for (i = 0; i < buf_data->nr_extres_pages; i++) { - struct page *pg = buf_data->extres_pages[i]; - void *extres_page = kmap(pg); - - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, - buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); - - kunmap(pg); - if (target_page_nr >= buf_data->nr_pages) - break; - } - break; - } - break; -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; - - KBASE_DEBUG_ASSERT(dma_buf != NULL); - if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) - dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); - - dma_to_copy = min(dma_buf->size, - (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); - ret = dma_buf_begin_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, -#endif - DMA_FROM_DEVICE); - if (ret) - goto out_unlock; - - for (i = 0; i < dma_to_copy/PAGE_SIZE; i++) { - - void *extres_page = dma_buf_kmap(dma_buf, i); - - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, - buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); - - dma_buf_kunmap(dma_buf, i, extres_page); - if (target_page_nr >= buf_data->nr_pages) - break; - } - dma_buf_end_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, -#endif - DMA_FROM_DEVICE); - break; - } -#endif - default: - ret = -EINVAL; - } -out_unlock: - kbase_gpu_vm_unlock(kctx); - return ret; - -} - -static int kbase_debug_copy(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers = katom->softjob_data; - unsigned int i; - - if (WARN_ON(!buffers)) - return -EINVAL; - - for (i = 0; i < katom->nr_extres; i++) { - int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); - - if (res) - return res; - } - - return 0; -} - -#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) - -int kbasep_jit_alloc_validate(struct kbase_context *kctx, - struct base_jit_alloc_info *info) -{ - /* If the ID is zero, then fail the job */ - if (info->id == 0) - return -EINVAL; - - /* Sanity check that the PA fits within the VA */ - if (info->va_pages < info->commit_pages) - return -EINVAL; - - /* Ensure the GPU address is correctly aligned */ - if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) - return -EINVAL; - - if (kctx->jit_version == 1) { - /* Old JIT didn't have usage_id, max_allocations, bin_id - * or padding, so force them to zero - */ - info->usage_id = 0; - info->max_allocations = 0; - info->bin_id = 0; - info->flags = 0; - memset(info->padding, 0, sizeof(info->padding)); - } else { - int j; - - /* Check padding is all zeroed */ - for (j = 0; j < sizeof(info->padding); j++) { - if (info->padding[j] != 0) { - return -EINVAL; - } - } - - /* No bit other than TILER_ALIGN_TOP shall be set */ - if (info->flags & ~BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { - return -EINVAL; - } - } - - return 0; -} - -static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) -{ - __user void *data = (__user void *)(uintptr_t) katom->jc; - struct base_jit_alloc_info *info; - struct kbase_context *kctx = katom->kctx; - u32 count; - int ret; - u32 i; - - /* For backwards compatibility */ - if (katom->nr_extres == 0) - katom->nr_extres = 1; - count = katom->nr_extres; - - /* Sanity checks */ - if (!data || count > kctx->jit_max_allocations || - count > ARRAY_SIZE(kctx->jit_alloc)) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); - if (!info) { - ret = -ENOMEM; - goto fail; - } - if (copy_from_user(info, data, sizeof(*info)*count) != 0) { - ret = -EINVAL; - goto free_info; - } - katom->softjob_data = info; - - for (i = 0; i < count; i++, info++) { - ret = kbasep_jit_alloc_validate(kctx, info); - if (ret) - goto free_info; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(katom, - info->va_pages, info->commit_pages, info->extent, - info->id, info->bin_id, info->max_allocations, - info->flags, info->usage_id); - } - - katom->jit_blocked = false; - - lockdep_assert_held(&kctx->jctx.lock); - list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); - - /* - * Note: - * The provided info->gpu_alloc_addr isn't validated here as - * userland can cache allocations which means that even - * though the region is valid it doesn't represent the - * same thing it used to. - * - * Complete validation of va_pages, commit_pages and extent - * isn't done here as it will be done during the call to - * kbase_mem_alloc. - */ - return 0; - -free_info: - kfree(katom->softjob_data); - katom->softjob_data = NULL; -fail: - return ret; -} - -static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) -{ - if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != - BASE_JD_REQ_SOFT_JIT_FREE)) - return NULL; - - return (u8 *) katom->softjob_data; -} - -static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct list_head *target_list_head = NULL; - struct kbase_jd_atom *entry; - - list_for_each_entry(entry, &kctx->jit_pending_alloc, queue) { - if (katom->age < entry->age) { - target_list_head = &entry->queue; - break; - } - } - - if (target_list_head == NULL) - target_list_head = &kctx->jit_pending_alloc; - - list_add_tail(&katom->queue, target_list_head); -} - -static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct base_jit_alloc_info *info; - struct kbase_va_region *reg; - struct kbase_vmap_struct mapping; - u64 *ptr, new_addr; - u32 count = katom->nr_extres; - u32 i; - - if (katom->jit_blocked) { - list_del(&katom->queue); - katom->jit_blocked = false; - } - - info = katom->softjob_data; - if (WARN_ON(!info)) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return 0; - } - - for (i = 0; i < count; i++, info++) { - /* The JIT ID is still in use so fail the allocation */ - if (kctx->jit_alloc[info->id]) { - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return 0; - } - } - - for (i = 0, info = katom->softjob_data; i < count; i++, info++) { - if (kctx->jit_alloc[info->id]) { - /* The JIT ID is duplicated in this atom. Roll back - * previous allocations and fail. - */ - u32 j; - - info = katom->softjob_data; - for (j = 0; j < i; j++, info++) { - kbase_jit_free(kctx, kctx->jit_alloc[info->id]); - kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; - } - - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return 0; - } - - /* Create a JIT allocation */ - reg = kbase_jit_allocate(kctx, info); - if (!reg) { - struct kbase_jd_atom *jit_atom; - bool can_block = false; - - lockdep_assert_held(&kctx->jctx.lock); - - jit_atom = list_first_entry(&kctx->jit_atoms_head, - struct kbase_jd_atom, jit_node); - - list_for_each_entry(jit_atom, &kctx->jit_atoms_head, jit_node) { - if (jit_atom == katom) - break; - - if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_JIT_FREE) { - u8 *free_ids = kbase_jit_free_get_ids(jit_atom); - - if (free_ids && *free_ids && - kctx->jit_alloc[*free_ids]) { - /* A JIT free which is active and - * submitted before this atom - */ - can_block = true; - break; - } - } - } - - if (!can_block) { - /* Mark the failed allocation as well as the - * other un-attempted allocations in the set, - * so we know they are in use even if the - * allocation itself failed. - */ - for (; i < count; i++, info++) { - kctx->jit_alloc[info->id] = - (struct kbase_va_region *) -1; - } - - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return 0; - } - - /* There are pending frees for an active allocation - * so we should wait to see whether they free the - * memory. Add to the list of atoms for which JIT - * allocation is pending. - */ - kbase_jit_add_to_pending_alloc_list(katom); - katom->jit_blocked = true; - - /* Rollback, the whole set will be re-attempted */ - while (i-- > 0) { - info--; - kbase_jit_free(kctx, kctx->jit_alloc[info->id]); - kctx->jit_alloc[info->id] = NULL; - } - - return 1; - } - - /* Bind it to the user provided ID. */ - kctx->jit_alloc[info->id] = reg; - } - - for (i = 0, info = katom->softjob_data; i < count; i++, info++) { - /* - * Write the address of the JIT allocation to the user provided - * GPU allocation. - */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); - if (!ptr) { - /* - * Leave the allocations "live" as the JIT free atom - * will be submitted anyway. - */ - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return 0; - } - - reg = kctx->jit_alloc[info->id]; - new_addr = reg->start_pfn << PAGE_SHIFT; - *ptr = new_addr; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(katom, - info->gpu_alloc_addr, - new_addr, info->va_pages); - kbase_vunmap(kctx, &mapping); - } - - katom->event_code = BASE_JD_EVENT_DONE; - - return 0; -} - -static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) -{ - struct base_jit_alloc_info *info; - - lockdep_assert_held(&katom->kctx->jctx.lock); - - if (WARN_ON(!katom->softjob_data)) - return; - - /* Remove atom from jit_atoms_head list */ - list_del(&katom->jit_node); - - if (katom->jit_blocked) { - list_del(&katom->queue); - katom->jit_blocked = false; - } - - info = katom->softjob_data; - /* Free the info structure */ - kfree(info); -} - -static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - __user void *data = (__user void *)(uintptr_t) katom->jc; - u8 *ids; - u32 count = MAX(katom->nr_extres, 1); - u32 i; - int ret; - - /* Sanity checks */ - if (count > ARRAY_SIZE(kctx->jit_alloc)) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); - if (!ids) { - ret = -ENOMEM; - goto fail; - } - - lockdep_assert_held(&kctx->jctx.lock); - katom->softjob_data = ids; - - /* For backwards compatibility */ - if (katom->nr_extres) { - /* Fail the job if there is no list of ids */ - if (!data) { - ret = -EINVAL; - goto free_info; - } - - if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { - ret = -EINVAL; - goto free_info; - } - } else { - katom->nr_extres = 1; - *ids = (u8)katom->jc; - } - for (i = 0; i < count; i++) - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(katom, ids[i]); - - list_add_tail(&katom->jit_node, &kctx->jit_atoms_head); - - return 0; - -free_info: - kfree(katom->softjob_data); - katom->softjob_data = NULL; -fail: - return ret; -} - -static void kbase_jit_free_process(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - u8 *ids = kbase_jit_free_get_ids(katom); - u32 count = katom->nr_extres; - u32 i; - - if (ids == NULL) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return; - } - - for (i = 0; i < count; i++, ids++) { - /* - * If the ID is zero or it is not in use yet then fail the job. - */ - if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return; - } - } -} - -static void kbasep_jit_free_finish_worker(struct work_struct *work) -{ - struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, - work); - struct kbase_context *kctx = katom->kctx; - int resched; - - mutex_lock(&kctx->jctx.lock); - kbase_finish_soft_job(katom); - resched = jd_done_nolock(katom, NULL); - mutex_unlock(&kctx->jctx.lock); - - if (resched) - kbase_js_sched_all(kctx->kbdev); -} - -static void kbase_jit_free_finish(struct kbase_jd_atom *katom) -{ - struct list_head *i, *tmp; - struct kbase_context *kctx = katom->kctx; - LIST_HEAD(jit_pending_alloc_list); - u8 *ids; - size_t j; - - lockdep_assert_held(&kctx->jctx.lock); - - ids = kbase_jit_free_get_ids(katom); - if (WARN_ON(ids == NULL)) { - return; - } - - /* Remove this atom from the kctx->jit_atoms_head list */ - list_del(&katom->jit_node); - - for (j = 0; j != katom->nr_extres; ++j) { - if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { - /* - * If the ID is valid but the allocation request failed - * still succeed this soft job but don't try and free - * the allocation. - */ - if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) - kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); - - kctx->jit_alloc[ids[j]] = NULL; - } - } - /* Free the list of ids */ - kfree(ids); - - list_splice_tail_init(&kctx->jit_pending_alloc, &jit_pending_alloc_list); - - list_for_each_safe(i, tmp, &jit_pending_alloc_list) { - struct kbase_jd_atom *pending_atom = list_entry(i, - struct kbase_jd_atom, queue); - if (kbase_jit_allocate_process(pending_atom) == 0) { - /* Atom has completed */ - INIT_WORK(&pending_atom->work, - kbasep_jit_free_finish_worker); - queue_work(kctx->jctx.job_done_wq, &pending_atom->work); - } - } -} - -static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) -{ - __user struct base_external_resource_list *user_ext_res; - struct base_external_resource_list *ext_res; - u64 count = 0; - size_t copy_size; - int ret; - - user_ext_res = (__user struct base_external_resource_list *) - (uintptr_t) katom->jc; - - /* Fail the job if there is no info structure */ - if (!user_ext_res) { - ret = -EINVAL; - goto fail; - } - - if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { - ret = -EINVAL; - goto fail; - } - - /* Is the number of external resources in range? */ - if (!count || count > BASE_EXT_RES_COUNT_MAX) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - copy_size = sizeof(*ext_res); - copy_size += sizeof(struct base_external_resource) * (count - 1); - ext_res = kzalloc(copy_size, GFP_KERNEL); - if (!ext_res) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { - ret = -EINVAL; - goto free_info; - } - - /* - * Overwrite the count with the first value incase it was changed - * after the fact. - */ - ext_res->count = count; - - katom->softjob_data = ext_res; - - return 0; - -free_info: - kfree(ext_res); -fail: - return ret; -} - -static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) -{ - struct base_external_resource_list *ext_res; - int i; - bool failed = false; - - ext_res = katom->softjob_data; - if (!ext_res) - goto failed_jc; - - kbase_gpu_vm_lock(katom->kctx); - - for (i = 0; i < ext_res->count; i++) { - u64 gpu_addr; - - gpu_addr = ext_res->ext_res[i].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - if (map) { - if (!kbase_sticky_resource_acquire(katom->kctx, - gpu_addr)) - goto failed_loop; - } else - if (!kbase_sticky_resource_release(katom->kctx, NULL, - gpu_addr)) - failed = true; - } - - /* - * In the case of unmap we continue unmapping other resources in the - * case of failure but will always report failure if _any_ unmap - * request fails. - */ - if (failed) - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - else - katom->event_code = BASE_JD_EVENT_DONE; - - kbase_gpu_vm_unlock(katom->kctx); - - return; - -failed_loop: - while (i > 0) { - u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - - kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); - - --i; - } - - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_vm_unlock(katom->kctx); - -failed_jc: - return; -} - -static void kbase_ext_res_finish(struct kbase_jd_atom *katom) -{ - struct base_external_resource_list *ext_res; - - ext_res = katom->softjob_data; - /* Free the info structure */ - kfree(ext_res); -} - -int kbase_process_soft_job(struct kbase_jd_atom *katom) -{ - int ret = 0; - - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom); - - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: - ret = kbase_dump_cpu_gpu_time(katom); - break; - -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - katom->event_code = kbase_sync_fence_out_trigger(katom, - katom->event_code == BASE_JD_EVENT_DONE ? - 0 : -EFAULT); - break; - case BASE_JD_REQ_SOFT_FENCE_WAIT: - { - ret = kbase_sync_fence_in_wait(katom); - - if (ret == 1) { -#ifdef CONFIG_MALI_FENCE_DEBUG - kbasep_add_waiting_with_timeout(katom); -#else - kbasep_add_waiting_soft_job(katom); -#endif - } - break; - } -#endif - - case BASE_JD_REQ_SOFT_REPLAY: - ret = kbase_replay_process(katom); - break; - case BASE_JD_REQ_SOFT_EVENT_WAIT: - ret = kbasep_soft_event_wait(katom); - break; - case BASE_JD_REQ_SOFT_EVENT_SET: - kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); - break; - case BASE_JD_REQ_SOFT_EVENT_RESET: - kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); - break; - case BASE_JD_REQ_SOFT_DEBUG_COPY: - { - int res = kbase_debug_copy(katom); - - if (res) - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - break; - } - case BASE_JD_REQ_SOFT_JIT_ALLOC: - ret = kbase_jit_allocate_process(katom); - break; - case BASE_JD_REQ_SOFT_JIT_FREE: - kbase_jit_free_process(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - kbase_ext_res_process(katom, true); - break; - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - kbase_ext_res_process(katom, false); - break; - } - - /* Atom is complete */ - KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom); - return ret; -} - -void kbase_cancel_soft_job(struct kbase_jd_atom *katom) -{ - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - case BASE_JD_REQ_SOFT_FENCE_WAIT: - kbase_sync_fence_in_cancel_wait(katom); - break; -#endif - case BASE_JD_REQ_SOFT_EVENT_WAIT: - kbasep_soft_event_cancel_job(katom); - break; - default: - /* This soft-job doesn't support cancellation! */ - KBASE_DEBUG_ASSERT(0); - } -} - -int kbase_prepare_soft_job(struct kbase_jd_atom *katom) -{ - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: - { - if (!IS_ALIGNED(katom->jc, cache_line_size())) - return -EINVAL; - } - break; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - { - struct base_fence fence; - int fd; - - if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) - return -EINVAL; - - fd = kbase_sync_fence_out_create(katom, - fence.basep.stream_fd); - if (fd < 0) - return -EINVAL; - - fence.basep.fd = fd; - if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { - kbase_sync_fence_out_remove(katom); - kbase_sync_fence_close_fd(fd); - fence.basep.fd = -EINVAL; - return -EINVAL; - } - } - break; - case BASE_JD_REQ_SOFT_FENCE_WAIT: - { - struct base_fence fence; - int ret; - - if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) - return -EINVAL; - - /* Get a reference to the fence object */ - ret = kbase_sync_fence_in_from_fd(katom, - fence.basep.fd); - if (ret < 0) - return ret; - -#ifdef CONFIG_MALI_DMA_FENCE - /* - * Set KCTX_NO_IMPLICIT_FENCE in the context the first - * time a soft fence wait job is observed. This will - * prevent the implicit dma-buf fence to conflict with - * the Android native sync fences. - */ - if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) - kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); -#endif /* CONFIG_MALI_DMA_FENCE */ - } - break; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ - case BASE_JD_REQ_SOFT_JIT_ALLOC: - return kbase_jit_allocate_prepare(katom); - case BASE_JD_REQ_SOFT_REPLAY: - break; - case BASE_JD_REQ_SOFT_JIT_FREE: - return kbase_jit_free_prepare(katom); - case BASE_JD_REQ_SOFT_EVENT_WAIT: - case BASE_JD_REQ_SOFT_EVENT_SET: - case BASE_JD_REQ_SOFT_EVENT_RESET: - if (katom->jc == 0) - return -EINVAL; - break; - case BASE_JD_REQ_SOFT_DEBUG_COPY: - return kbase_debug_copy_prepare(katom); - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - return kbase_ext_res_prepare(katom); - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - return kbase_ext_res_prepare(katom); - default: - /* Unsupported soft-job */ - return -EINVAL; - } - return 0; -} - -void kbase_finish_soft_job(struct kbase_jd_atom *katom) -{ - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: - /* Nothing to do */ - break; -#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) - case BASE_JD_REQ_SOFT_FENCE_TRIGGER: - /* If fence has not yet been signaled, do it now */ - kbase_sync_fence_out_trigger(katom, katom->event_code == - BASE_JD_EVENT_DONE ? 0 : -EFAULT); - break; - case BASE_JD_REQ_SOFT_FENCE_WAIT: - /* Release katom's reference to fence object */ - kbase_sync_fence_in_remove(katom); - break; -#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ - case BASE_JD_REQ_SOFT_DEBUG_COPY: - kbase_debug_copy_finish(katom); - break; - case BASE_JD_REQ_SOFT_JIT_ALLOC: - kbase_jit_allocate_finish(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - kbase_ext_res_finish(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - kbase_ext_res_finish(katom); - break; - case BASE_JD_REQ_SOFT_JIT_FREE: - kbase_jit_free_finish(katom); - break; - } -} - -void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) -{ - LIST_HEAD(local_suspended_soft_jobs); - struct kbase_jd_atom *tmp_iter; - struct kbase_jd_atom *katom_iter; - struct kbasep_js_device_data *js_devdata; - bool resched = false; - - KBASE_DEBUG_ASSERT(kbdev); - - js_devdata = &kbdev->js_data; - - /* Move out the entire list */ - mutex_lock(&js_devdata->runpool_mutex); - list_splice_init(&js_devdata->suspended_soft_jobs_list, - &local_suspended_soft_jobs); - mutex_unlock(&js_devdata->runpool_mutex); - - /* - * Each atom must be detached from the list and ran separately - - * it could be re-added to the old list, but this is unlikely - */ - list_for_each_entry_safe(katom_iter, tmp_iter, - &local_suspended_soft_jobs, dep_item[1]) { - struct kbase_context *kctx = katom_iter->kctx; - - mutex_lock(&kctx->jctx.lock); - - /* Remove from the global list */ - list_del(&katom_iter->dep_item[1]); - /* Remove from the context's list of waiting soft jobs */ - kbasep_remove_waiting_soft_job(katom_iter); - - if (kbase_process_soft_job(katom_iter) == 0) { - kbase_finish_soft_job(katom_iter); - resched |= jd_done_nolock(katom_iter, NULL); - } else { - KBASE_DEBUG_ASSERT((katom_iter->core_req & - BASE_JD_REQ_SOFT_JOB_TYPE) - != BASE_JD_REQ_SOFT_REPLAY); - } - - mutex_unlock(&kctx->jctx.lock); - } - - if (resched) - kbase_js_sched_all(kbdev); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.c deleted file mode 100755 index 22caa4a6d814..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.c +++ /dev/null @@ -1,28 +0,0 @@ - /* - * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ -#include "mali_kbase_strings.h" - -#define KBASE_DRV_NAME "mali" -#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" - -const char kbase_drv_name[] = KBASE_DRV_NAME; -const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.h deleted file mode 100755 index d2f1825314fe..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_strings.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -extern const char kbase_drv_name[]; -extern const char kbase_timeline_name[]; diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync.h deleted file mode 100755 index 70557dd5b33f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * @file mali_kbase_sync.h - * - * This file contains our internal "API" for explicit fences. - * It hides the implementation details of the actual explicit fence mechanism - * used (Android fences or sync file with DMA fences). - */ - -#ifndef MALI_KBASE_SYNC_H -#define MALI_KBASE_SYNC_H - -#include -#ifdef CONFIG_SYNC -#include -#endif -#ifdef CONFIG_SYNC_FILE -#include "mali_kbase_fence_defs.h" -#include -#endif - -#include "mali_kbase.h" - -/** - * struct kbase_sync_fence_info - Information about a fence - * @fence: Pointer to fence (type is void*, as underlaying struct can differ) - * @name: The name given to this fence when it was created - * @status: < 0 means error, 0 means active, 1 means signaled - * - * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() - * to get the information. - */ -struct kbase_sync_fence_info { - void *fence; - char name[32]; - int status; -}; - -/** - * kbase_sync_fence_stream_create() - Create a stream object - * @name: Name of stream (only used to ease debugging/visualization) - * @out_fd: A file descriptor representing the created stream object - * - * Can map down to a timeline implementation in some implementations. - * Exposed as a file descriptor. - * Life-time controlled via the file descriptor: - * - dup to add a ref - * - close to remove a ref - * - * return: 0 on success, < 0 on error - */ -int kbase_sync_fence_stream_create(const char *name, int *const out_fd); - -/** - * kbase_sync_fence_out_create Create an explicit output fence to specified atom - * @katom: Atom to assign the new explicit fence to - * @stream_fd: File descriptor for stream object to create fence on - * - * return: Valid file descriptor to fence or < 0 on error - */ -int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); - -/** - * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom - * @katom: Atom to assign the existing explicit fence to - * @fd: File descriptor to an existing fence - * - * Assigns an explicit input fence to atom. - * This can later be waited for by calling @kbase_sync_fence_in_wait - * - * return: 0 on success, < 0 on error - */ -int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); - -/** - * kbase_sync_fence_validate() - Validate a fd to be a valid fence - * @fd: File descriptor to check - * - * This function is only usable to catch unintentional user errors early, - * it does not stop malicious code changing the fd after this function returns. - * - * return 0: if fd is for a valid fence, < 0 if invalid - */ -int kbase_sync_fence_validate(int fd); - -/** - * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom - * @katom: Atom with an explicit fence to signal - * @result: < 0 means signal with error, 0 >= indicates success - * - * Signal output fence attached on katom and remove the fence from the atom. - * - * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE - */ -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); - -/** - * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled - * @katom: Atom with explicit fence to wait for - * - * If the fence is already signaled, then 0 is returned, and the caller must - * continue processing of the katom. - * - * If the fence isn't already signaled, then this kbase_sync framework will - * take responsibility to continue the processing once the fence is signaled. - * - * return: 0 if already signaled, otherwise 1 - */ -int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); - -/** - * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits - * @katom: Atom to cancel wait for - * - * This function is fully responsible for continuing processing of this atom - * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) - */ -void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); - -/** - * kbase_sync_fence_in_remove() - Remove the input fence from the katom - * @katom: Atom to remove explicit input fence for - * - * This will also release the corresponding reference. - */ -void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); - -/** - * kbase_sync_fence_out_remove() - Remove the output fence from the katom - * @katom: Atom to remove explicit output fence for - * - * This will also release the corresponding reference. - */ -void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); - -/** - * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence - * @fd: File descriptor to close - */ -static inline void kbase_sync_fence_close_fd(int fd) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) - ksys_close(fd); -#else - sys_close(fd); -#endif -} - -/** - * kbase_sync_fence_in_info_get() - Retrieves information about input fence - * @katom: Atom to get fence information from - * @info: Struct to be filled with fence information - * - * return: 0 on success, < 0 on error - */ -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info); - -/** - * kbase_sync_fence_out_info_get() - Retrieves information about output fence - * @katom: Atom to get fence information from - * @info: Struct to be filled with fence information - * - * return: 0 on success, < 0 on error - */ -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info); - -/** - * kbase_sync_status_string() - Get string matching @status - * @status: Value of fence status. - * - * return: Pointer to string describing @status. - */ -const char *kbase_sync_status_string(int status); - -/* - * Internal worker used to continue processing of atom. - */ -void kbase_sync_fence_wait_worker(struct work_struct *data); - -#ifdef CONFIG_MALI_FENCE_DEBUG -/** - * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state - * @katom: Atom to trigger fence debug dump for - */ -void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); -#endif - -#endif /* MALI_KBASE_SYNC_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_android.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_android.c deleted file mode 100755 index 75940fb08a05..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_android.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Code for supporting explicit Android fences (CONFIG_SYNC) - * Known to be good for kernels 4.5 and earlier. - * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels - * (see mali_kbase_sync_file.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include "sync.h" -#include -#include - -struct mali_sync_timeline { - struct sync_timeline timeline; - atomic_t counter; - atomic_t signaled; -}; - -struct mali_sync_pt { - struct sync_pt pt; - int order; - int result; -}; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -/* For backwards compatibility with kernels before 3.17. After 3.17 - * sync_pt_parent is included in the kernel. */ -static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) -{ - return pt->parent; -} -#endif - -static struct mali_sync_timeline *to_mali_sync_timeline( - struct sync_timeline *timeline) -{ - return container_of(timeline, struct mali_sync_timeline, timeline); -} - -static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) -{ - return container_of(pt, struct mali_sync_pt, pt); -} - -static struct sync_pt *timeline_dup(struct sync_pt *pt) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_pt *new_mpt; - struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), - sizeof(struct mali_sync_pt)); - - if (!new_pt) - return NULL; - - new_mpt = to_mali_sync_pt(new_pt); - new_mpt->order = mpt->order; - new_mpt->result = mpt->result; - - return new_pt; -} - -static int timeline_has_signaled(struct sync_pt *pt) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_timeline *mtl = to_mali_sync_timeline( - sync_pt_parent(pt)); - int result = mpt->result; - - int diff = atomic_read(&mtl->signaled) - mpt->order; - - if (diff >= 0) - return (result < 0) ? result : 1; - - return 0; -} - -static int timeline_compare(struct sync_pt *a, struct sync_pt *b) -{ - struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); - struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); - - int diff = ma->order - mb->order; - - if (diff == 0) - return 0; - - return (diff < 0) ? -1 : 1; -} - -static void timeline_value_str(struct sync_timeline *timeline, char *str, - int size) -{ - struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); - - snprintf(str, size, "%d", atomic_read(&mtl->signaled)); -} - -static void pt_value_str(struct sync_pt *pt, char *str, int size) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - - snprintf(str, size, "%d(%d)", mpt->order, mpt->result); -} - -static struct sync_timeline_ops mali_timeline_ops = { - .driver_name = "Mali", - .dup = timeline_dup, - .has_signaled = timeline_has_signaled, - .compare = timeline_compare, - .timeline_value_str = timeline_value_str, - .pt_value_str = pt_value_str, -}; - -/* Allocates a timeline for Mali - * - * One timeline should be allocated per API context. - */ -static struct sync_timeline *mali_sync_timeline_alloc(const char *name) -{ - struct sync_timeline *tl; - struct mali_sync_timeline *mtl; - - tl = sync_timeline_create(&mali_timeline_ops, - sizeof(struct mali_sync_timeline), name); - if (!tl) - return NULL; - - /* Set the counter in our private struct */ - mtl = to_mali_sync_timeline(tl); - atomic_set(&mtl->counter, 0); - atomic_set(&mtl->signaled, 0); - - return tl; -} - -static int kbase_stream_close(struct inode *inode, struct file *file) -{ - struct sync_timeline *tl; - - tl = (struct sync_timeline *)file->private_data; - sync_timeline_destroy(tl); - return 0; -} - -static const struct file_operations stream_fops = { - .owner = THIS_MODULE, - .release = kbase_stream_close, -}; - -int kbase_sync_fence_stream_create(const char *name, int *const out_fd) -{ - struct sync_timeline *tl; - - if (!out_fd) - return -EINVAL; - - tl = mali_sync_timeline_alloc(name); - if (!tl) - return -EINVAL; - - *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); - - if (*out_fd < 0) { - sync_timeline_destroy(tl); - return -EINVAL; - } - - return 0; -} - -/* Allocates a sync point within the timeline. - * - * The timeline must be the one allocated by kbase_sync_timeline_alloc - * - * Sync points must be triggered in *exactly* the same order as they are - * allocated. - */ -static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) -{ - struct sync_pt *pt = sync_pt_create(parent, - sizeof(struct mali_sync_pt)); - struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); - struct mali_sync_pt *mpt; - - if (!pt) - return NULL; - - mpt = to_mali_sync_pt(pt); - mpt->order = atomic_inc_return(&mtl->counter); - mpt->result = 0; - - return pt; -} - -int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) -{ - struct sync_timeline *tl; - struct sync_pt *pt; - struct sync_fence *fence; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) - struct files_struct *files; - struct fdtable *fdt; -#endif - int fd; - struct file *tl_file; - - tl_file = fget(tl_fd); - if (tl_file == NULL) - return -EBADF; - - if (tl_file->f_op != &stream_fops) { - fd = -EBADF; - goto out; - } - - tl = tl_file->private_data; - - pt = kbase_sync_pt_alloc(tl); - if (!pt) { - fd = -EFAULT; - goto out; - } - - fence = sync_fence_create("mali_fence", pt); - if (!fence) { - sync_pt_free(pt); - fd = -EFAULT; - goto out; - } - - /* from here the fence owns the sync_pt */ - - /* create a fd representing the fence */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) - fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (fd < 0) { - sync_fence_put(fence); - goto out; - } -#else - fd = get_unused_fd(); - if (fd < 0) { - sync_fence_put(fence); - goto out; - } - - files = current->files; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) - __set_close_on_exec(fd, fdt); -#else - FD_SET(fd, fdt->close_on_exec); -#endif - spin_unlock(&files->file_lock); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ - - /* bind fence to the new fd */ - sync_fence_install(fence, fd); - - katom->fence = sync_fence_fdget(fd); - if (katom->fence == NULL) { - /* The only way the fence can be NULL is if userspace closed it - * for us, so we don't need to clear it up */ - fd = -EINVAL; - goto out; - } - -out: - fput(tl_file); - - return fd; -} - -int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) -{ - katom->fence = sync_fence_fdget(fd); - return katom->fence ? 0 : -ENOENT; -} - -int kbase_sync_fence_validate(int fd) -{ - struct sync_fence *fence; - - fence = sync_fence_fdget(fd); - if (!fence) - return -EINVAL; - - sync_fence_put(fence); - return 0; -} - -/* Returns true if the specified timeline is allocated by Mali */ -static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) -{ - return timeline->ops == &mali_timeline_ops; -} - -/* Signals a particular sync point - * - * Sync points must be triggered in *exactly* the same order as they are - * allocated. - * - * If they are signaled in the wrong order then a message will be printed in - * debug builds and otherwise attempts to signal order sync_pts will be ignored. - * - * result can be negative to indicate error, any other value is interpreted as - * success. - */ -static void kbase_sync_signal_pt(struct sync_pt *pt, int result) -{ - struct mali_sync_pt *mpt = to_mali_sync_pt(pt); - struct mali_sync_timeline *mtl = to_mali_sync_timeline( - sync_pt_parent(pt)); - int signaled; - int diff; - - mpt->result = result; - - do { - signaled = atomic_read(&mtl->signaled); - - diff = signaled - mpt->order; - - if (diff > 0) { - /* The timeline is already at or ahead of this point. - * This should not happen unless userspace has been - * signaling fences out of order, so warn but don't - * violate the sync_pt API. - * The warning is only in debug builds to prevent - * a malicious user being able to spam dmesg. - */ -#ifdef CONFIG_MALI_DEBUG - pr_err("Fences were triggered in a different order to allocation!"); -#endif /* CONFIG_MALI_DEBUG */ - return; - } - } while (atomic_cmpxchg(&mtl->signaled, - signaled, mpt->order) != signaled); -} - -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) -{ - struct sync_pt *pt; - struct sync_timeline *timeline; - - if (!katom->fence) - return BASE_JD_EVENT_JOB_CANCELLED; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - if (!list_is_singular(&katom->fence->pt_list_head)) { -#else - if (katom->fence->num_fences != 1) { -#endif - /* Not exactly one item in the list - so it didn't (directly) - * come from us */ - return BASE_JD_EVENT_JOB_CANCELLED; - } - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - pt = list_first_entry(&katom->fence->pt_list_head, - struct sync_pt, pt_list); -#else - pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); -#endif - timeline = sync_pt_parent(pt); - - if (!kbase_sync_timeline_is_ours(timeline)) { - /* Fence has a sync_pt which isn't ours! */ - return BASE_JD_EVENT_JOB_CANCELLED; - } - - kbase_sync_signal_pt(pt, result); - - sync_timeline_signal(timeline); - - kbase_sync_fence_out_remove(katom); - - return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; -} - -static inline int kbase_fence_get_status(struct sync_fence *fence) -{ - if (!fence) - return -ENOENT; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - return fence->status; -#else - return atomic_read(&fence->status); -#endif -} - -static void kbase_fence_wait_callback(struct sync_fence *fence, - struct sync_fence_waiter *waiter) -{ - struct kbase_jd_atom *katom = container_of(waiter, - struct kbase_jd_atom, sync_waiter); - struct kbase_context *kctx = katom->kctx; - - /* Propagate the fence status to the atom. - * If negative then cancel this atom and its dependencies. - */ - if (kbase_fence_get_status(fence) < 0) - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - /* To prevent a potential deadlock we schedule the work onto the - * job_done_wq workqueue - * - * The issue is that we may signal the timeline while holding - * kctx->jctx.lock and the callbacks are run synchronously from - * sync_timeline_signal. So we simply defer the work. - */ - - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} - -int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) -{ - int ret; - - sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); - - ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); - - if (ret == 1) { - /* Already signaled */ - return 0; - } - - if (ret < 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); - } - - return 1; -} - -void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) -{ - if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { - /* The wait wasn't cancelled - leave the cleanup for - * kbase_fence_wait_callback */ - return; - } - - /* Wait was cancelled - zap the atoms */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - kbasep_remove_waiting_soft_job(katom); - kbase_finish_soft_job(katom); - - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(katom->kctx->kbdev); -} - -void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -{ - if (katom->fence) { - sync_fence_put(katom->fence); - katom->fence = NULL; - } -} - -void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) -{ - if (katom->fence) { - sync_fence_put(katom->fence); - katom->fence = NULL; - } -} - -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ - if (!katom->fence) - return -ENOENT; - - info->fence = katom->fence; - info->status = kbase_fence_get_status(katom->fence); - strlcpy(info->name, katom->fence->name, sizeof(info->name)); - - return 0; -} - -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ - if (!katom->fence) - return -ENOENT; - - info->fence = katom->fence; - info->status = kbase_fence_get_status(katom->fence); - strlcpy(info->name, katom->fence->name, sizeof(info->name)); - - return 0; -} - -#ifdef CONFIG_MALI_FENCE_DEBUG -void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) -{ - /* Dump out the full state of all the Android sync fences. - * The function sync_dump() isn't exported to modules, so force - * sync_fence_wait() to time out to trigger sync_dump(). - */ - if (katom->fence) - sync_fence_wait(katom->fence, 1); -} -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_common.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_common.c deleted file mode 100755 index 5239daee409e..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_common.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * @file mali_kbase_sync_common.c - * - * Common code for our explicit fence functionality - */ - -#include -#include "mali_kbase.h" -#include "mali_kbase_sync.h" - -void kbase_sync_fence_wait_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom; - - katom = container_of(data, struct kbase_jd_atom, work); - kbase_soft_event_wait_callback(katom); -} - -const char *kbase_sync_status_string(int status) -{ - if (status == 0) - return "signaled"; - else if (status > 0) - return "active"; - else - return "error"; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_file.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_file.c deleted file mode 100755 index 231239941352..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_sync_file.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) - * Introduced in kernel 4.9. - * Android explicit fences (CONFIG_SYNC) can be used for older kernels - * (see mali_kbase_sync_android.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mali_kbase_fence_defs.h" -#include "mali_kbase_sync.h" -#include "mali_kbase_fence.h" -#include "mali_kbase.h" - -static const struct file_operations stream_fops = { - .owner = THIS_MODULE -}; - -int kbase_sync_fence_stream_create(const char *name, int *const out_fd) -{ - if (!out_fd) - return -EINVAL; - - *out_fd = anon_inode_getfd(name, &stream_fops, NULL, - O_RDONLY | O_CLOEXEC); - if (*out_fd < 0) - return -EINVAL; - - return 0; -} - -int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - struct sync_file *sync_file; - int fd; - - fence = kbase_fence_out_new(katom); - if (!fence) - return -ENOMEM; - -#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) - /* Take an extra reference to the fence on behalf of the sync_file. - * This is only needed on older kernels where sync_file_create() - * does not take its own reference. This was changed in v4.9.68, - * where sync_file_create() now takes its own reference. - */ - dma_fence_get(fence); -#endif - - /* create a sync_file fd representing the fence */ - sync_file = sync_file_create(fence); - if (!sync_file) { - dma_fence_put(fence); - kbase_fence_out_remove(katom); - return -ENOMEM; - } - - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - fput(sync_file->file); - kbase_fence_out_remove(katom); - return fd; - } - - fd_install(fd, sync_file->file); - - return fd; -} - -int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence = sync_file_get_fence(fd); -#else - struct dma_fence *fence = sync_file_get_fence(fd); -#endif - - if (!fence) - return -ENOENT; - - kbase_fence_fence_in_set(katom, fence); - - return 0; -} - -int kbase_sync_fence_validate(int fd) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence = sync_file_get_fence(fd); -#else - struct dma_fence *fence = sync_file_get_fence(fd); -#endif - - if (!fence) - return -EINVAL; - - dma_fence_put(fence); - - return 0; /* valid */ -} - -enum base_jd_event_code -kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) -{ - int res; - - if (!kbase_fence_out_is_ours(katom)) { - /* Not our fence */ - return BASE_JD_EVENT_JOB_CANCELLED; - } - - res = kbase_fence_out_signal(katom, result); - if (unlikely(res < 0)) { - dev_warn(katom->kctx->kbdev->dev, - "fence_signal() failed with %d\n", res); - } - - kbase_sync_fence_out_remove(katom); - - return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -static void kbase_fence_wait_callback(struct fence *fence, - struct fence_cb *cb) -#else -static void kbase_fence_wait_callback(struct dma_fence *fence, - struct dma_fence_cb *cb) -#endif -{ - struct kbase_fence_cb *kcb = container_of(cb, - struct kbase_fence_cb, - fence_cb); - struct kbase_jd_atom *katom = kcb->katom; - struct kbase_context *kctx = katom->kctx; - - /* Cancel atom if fence is erroneous */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68)) - if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) -#else - if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) -#endif - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - if (kbase_fence_dep_count_dec_and_test(katom)) { - /* We take responsibility of handling this */ - kbase_fence_dep_count_set(katom, -1); - - /* To prevent a potential deadlock we schedule the work onto the - * job_done_wq workqueue - * - * The issue is that we may signal the timeline while holding - * kctx->jctx.lock and the callbacks are run synchronously from - * sync_timeline_signal. So we simply defer the work. - */ - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); - } -} - -int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) -{ - int err; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - - fence = kbase_fence_in_get(katom); - if (!fence) - return 0; /* no input fence to wait for, good to go! */ - - kbase_fence_dep_count_set(katom, 1); - - err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); - - kbase_fence_put(fence); - - if (likely(!err)) { - /* Test if the callbacks are already triggered */ - if (kbase_fence_dep_count_dec_and_test(katom)) { - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); - return 0; /* Already signaled, good to go right now */ - } - - /* Callback installed, so we just need to wait for it... */ - } else { - /* Failure */ - kbase_fence_free_callbacks(katom); - kbase_fence_dep_count_set(katom, -1); - - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - - INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); - } - - return 1; /* completion to be done later by callback/worker */ -} - -void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) -{ - if (!kbase_fence_free_callbacks(katom)) { - /* The wait wasn't cancelled - - * leave the cleanup for kbase_fence_wait_callback */ - return; - } - - /* Take responsibility of completion */ - kbase_fence_dep_count_set(katom, -1); - - /* Wait was cancelled - zap the atoms */ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - kbasep_remove_waiting_soft_job(katom); - kbase_finish_soft_job(katom); - - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(katom->kctx->kbdev); -} - -void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) -{ - kbase_fence_out_remove(katom); -} - -void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) -{ - kbase_fence_free_callbacks(katom); - kbase_fence_in_remove(katom); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -static void kbase_sync_fence_info_get(struct fence *fence, - struct kbase_sync_fence_info *info) -#else -static void kbase_sync_fence_info_get(struct dma_fence *fence, - struct kbase_sync_fence_info *info) -#endif -{ - info->fence = fence; - - /* translate into CONFIG_SYNC status: - * < 0 : error - * 0 : active - * 1 : signaled - */ - if (dma_fence_is_signaled(fence)) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 68)) - int status = fence->error; -#else - int status = fence->status; -#endif - if (status < 0) - info->status = status; /* signaled with error */ - else - info->status = 1; /* signaled with success */ - } else { - info->status = 0; /* still active (unsignaled) */ - } - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - scnprintf(info->name, sizeof(info->name), "%u#%u", - fence->context, fence->seqno); -#else - scnprintf(info->name, sizeof(info->name), "%llu#%u", - fence->context, fence->seqno); -#endif -} - -int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - - fence = kbase_fence_in_get(katom); - if (!fence) - return -ENOENT; - - kbase_sync_fence_info_get(fence, info); - - kbase_fence_put(fence); - - return 0; -} - -int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, - struct kbase_sync_fence_info *info) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) - struct fence *fence; -#else - struct dma_fence *fence; -#endif - - fence = kbase_fence_out_get(katom); - if (!fence) - return -ENOENT; - - kbase_sync_fence_info_get(fence, info); - - kbase_fence_put(fence); - - return 0; -} - - -#ifdef CONFIG_MALI_FENCE_DEBUG -void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) -{ - /* Not implemented */ -} -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.c deleted file mode 100755 index 10e38897514b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.c +++ /dev/null @@ -1,2674 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/*****************************************************************************/ - -/* The version of swtrace protocol used in timeline stream. */ -#define SWTRACE_VERSION 3 - -/* The maximum expected length of string in tracepoint descriptor. */ -#define STRLEN_MAX 64 /* bytes */ - -/* The number of nanoseconds in a second. */ -#define NSECS_IN_SEC 1000000000ull /* ns */ - -/* The period of autoflush checker execution in milliseconds. */ -#define AUTOFLUSH_INTERVAL 1000 /* ms */ - -/* The maximum size of a single packet used by timeline. */ -#define PACKET_SIZE 4096 /* bytes */ - -/* The number of packets used by one timeline stream. */ -#define PACKET_COUNT 16 - -/* The number of bytes reserved for packet header. - * These value must be defined according to MIPE documentation. */ -#define PACKET_HEADER_SIZE 8 /* bytes */ - -/* The number of bytes reserved for packet sequence number. - * These value must be defined according to MIPE documentation. */ -#define PACKET_NUMBER_SIZE 4 /* bytes */ - -/* Packet header - first word. - * These values must be defined according to MIPE documentation. */ -#define PACKET_STREAMID_POS 0 -#define PACKET_STREAMID_LEN 8 -#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) -#define PACKET_RSVD1_LEN 8 -#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) -#define PACKET_TYPE_LEN 3 -#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) -#define PACKET_CLASS_LEN 7 -#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) -#define PACKET_FAMILY_LEN 6 - -/* Packet header - second word - * These values must be defined according to MIPE documentation. */ -#define PACKET_LENGTH_POS 0 -#define PACKET_LENGTH_LEN 24 -#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) -#define PACKET_SEQBIT_LEN 1 -#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) -#define PACKET_RSVD2_LEN 7 - -/* Types of streams generated by timeline. - * Order is significant! Header streams must precede respective body streams. */ -enum tl_stream_type { - TL_STREAM_TYPE_OBJ_HEADER, - TL_STREAM_TYPE_OBJ_SUMMARY, - TL_STREAM_TYPE_OBJ, - TL_STREAM_TYPE_AUX_HEADER, - TL_STREAM_TYPE_AUX, - - TL_STREAM_TYPE_COUNT -}; - -/* Timeline packet family ids. - * Values are significant! Check MIPE documentation. */ -enum tl_packet_family { - TL_PACKET_FAMILY_CTRL = 0, /* control packets */ - TL_PACKET_FAMILY_TL = 1, /* timeline packets */ - - TL_PACKET_FAMILY_COUNT -}; - -/* Packet classes used in timeline streams. - * Values are significant! Check MIPE documentation. */ -enum tl_packet_class { - TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ - TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ -}; - -/* Packet types used in timeline streams. - * Values are significant! Check MIPE documentation. */ -enum tl_packet_type { - TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ - TL_PACKET_TYPE_BODY = 1, /* stream's body */ - TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ -}; - -/* Message ids of trace events that are recorded in the timeline stream. */ -enum tl_msg_id_obj { - /* Timeline object events. */ - KBASE_TL_NEW_CTX, - KBASE_TL_NEW_GPU, - KBASE_TL_NEW_LPU, - KBASE_TL_NEW_ATOM, - KBASE_TL_NEW_AS, - KBASE_TL_DEL_CTX, - KBASE_TL_DEL_ATOM, - KBASE_TL_LIFELINK_LPU_GPU, - KBASE_TL_LIFELINK_AS_GPU, - KBASE_TL_RET_CTX_LPU, - KBASE_TL_RET_ATOM_CTX, - KBASE_TL_RET_ATOM_LPU, - KBASE_TL_NRET_CTX_LPU, - KBASE_TL_NRET_ATOM_CTX, - KBASE_TL_NRET_ATOM_LPU, - KBASE_TL_RET_AS_CTX, - KBASE_TL_NRET_AS_CTX, - KBASE_TL_RET_ATOM_AS, - KBASE_TL_NRET_ATOM_AS, - KBASE_TL_ATTRIB_ATOM_CONFIG, - KBASE_TL_ATTRIB_ATOM_PRIORITY, - KBASE_TL_ATTRIB_ATOM_STATE, - KBASE_TL_ATTRIB_ATOM_PRIORITIZED, - KBASE_TL_ATTRIB_ATOM_JIT, - KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, - KBASE_TL_ATTRIB_ATOM_JITFREEINFO, - KBASE_TL_ATTRIB_AS_CONFIG, - KBASE_TL_EVENT_LPU_SOFTSTOP, - KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, - KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, - KBASE_TL_EVENT_ATOM_SOFTJOB_START, - KBASE_TL_EVENT_ATOM_SOFTJOB_END, - - /* Job dump specific events. */ - KBASE_JD_GPU_SOFT_RESET -}; - -/* Message ids of trace events that are recorded in the auxiliary stream. */ -enum tl_msg_id_aux { - KBASE_AUX_PM_STATE, - KBASE_AUX_PAGEFAULT, - KBASE_AUX_PAGESALLOC, - KBASE_AUX_DEVFREQ_TARGET, - KBASE_AUX_PROTECTED_ENTER_START, - KBASE_AUX_PROTECTED_ENTER_END, - KBASE_AUX_PROTECTED_LEAVE_START, - KBASE_AUX_PROTECTED_LEAVE_END, - KBASE_AUX_JIT_STATS, -}; - -/*****************************************************************************/ - -/** - * struct tl_stream - timeline stream structure - * @lock: message order lock - * @buffer: array of buffers - * @wbi: write buffer index - * @rbi: read buffer index - * @numbered: if non-zero stream's packets are sequentially numbered - * @autoflush_counter: counter tracking stream's autoflush state - * - * This structure holds information needed to construct proper packets in the - * timeline stream. Each message in sequence must bear timestamp that is greater - * to one in previous message in the same stream. For this reason lock is held - * throughout the process of message creation. Each stream contains set of - * buffers. Each buffer will hold one MIPE packet. In case there is no free - * space required to store incoming message the oldest buffer is discarded. - * Each packet in timeline body stream has sequence number embedded (this value - * must increment monotonically and is used by packets receiver to discover - * buffer overflows. - * Autoflush counter is set to negative number when there is no data pending - * for flush and it is set to zero on every update of the buffer. Autoflush - * timer will increment the counter by one on every expiry. In case there will - * be no activity on the buffer during two consecutive timer expiries, stream - * buffer will be flushed. - */ -struct tl_stream { - spinlock_t lock; - - struct { - atomic_t size; /* number of bytes in buffer */ - char data[PACKET_SIZE]; /* buffer's data */ - } buffer[PACKET_COUNT]; - - atomic_t wbi; - atomic_t rbi; - - int numbered; - atomic_t autoflush_counter; -}; - -/** - * struct tp_desc - tracepoint message descriptor structure - * @id: tracepoint ID identifying message in stream - * @id_str: human readable version of tracepoint ID - * @name: tracepoint description - * @arg_types: tracepoint's arguments types declaration - * @arg_names: comma separated list of tracepoint's arguments names - */ -struct tp_desc { - u32 id; - const char *id_str; - const char *name; - const char *arg_types; - const char *arg_names; -}; - -/*****************************************************************************/ - -/* Configuration of timeline streams generated by kernel. - * Kernel emit only streams containing either timeline object events or - * auxiliary events. All streams have stream id value of 1 (as opposed to user - * space streams that have value of 0). */ -static const struct { - enum tl_packet_family pkt_family; - enum tl_packet_class pkt_class; - enum tl_packet_type pkt_type; - unsigned int stream_id; -} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER, 1}, - {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} -}; - -/* The timeline streams generated by kernel. */ -static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT]; - -/* Autoflush timer. */ -static struct timer_list autoflush_timer; - -/* If non-zero autoflush timer is active. */ -static atomic_t autoflush_timer_active; - -/* Reader lock. Only one reader is allowed to have access to the timeline - * streams at any given time. */ -static DEFINE_MUTEX(tl_reader_lock); - -/* Timeline stream event queue. */ -static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); - -/* The timeline stream file operations functions. */ -static ssize_t kbasep_tlstream_read( - struct file *filp, - char __user *buffer, - size_t size, - loff_t *f_pos); -static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait); -static int kbasep_tlstream_release(struct inode *inode, struct file *filp); - -/* The timeline stream file operations structure. */ -static const struct file_operations kbasep_tlstream_fops = { - .release = kbasep_tlstream_release, - .read = kbasep_tlstream_read, - .poll = kbasep_tlstream_poll, -}; - -/* Descriptors of timeline messages transmitted in object events stream. */ -static const struct tp_desc tp_desc_obj[] = { - { - KBASE_TL_NEW_CTX, - __stringify(KBASE_TL_NEW_CTX), - "object ctx is created", - "@pII", - "ctx,ctx_nr,tgid" - }, - { - KBASE_TL_NEW_GPU, - __stringify(KBASE_TL_NEW_GPU), - "object gpu is created", - "@pII", - "gpu,gpu_id,core_count" - }, - { - KBASE_TL_NEW_LPU, - __stringify(KBASE_TL_NEW_LPU), - "object lpu is created", - "@pII", - "lpu,lpu_nr,lpu_fn" - }, - { - KBASE_TL_NEW_ATOM, - __stringify(KBASE_TL_NEW_ATOM), - "object atom is created", - "@pI", - "atom,atom_nr" - }, - { - KBASE_TL_NEW_AS, - __stringify(KBASE_TL_NEW_AS), - "address space object is created", - "@pI", - "address_space,as_nr" - }, - { - KBASE_TL_DEL_CTX, - __stringify(KBASE_TL_DEL_CTX), - "context is destroyed", - "@p", - "ctx" - }, - { - KBASE_TL_DEL_ATOM, - __stringify(KBASE_TL_DEL_ATOM), - "atom is destroyed", - "@p", - "atom" - }, - { - KBASE_TL_LIFELINK_LPU_GPU, - __stringify(KBASE_TL_LIFELINK_LPU_GPU), - "lpu is deleted with gpu", - "@pp", - "lpu,gpu" - }, - { - KBASE_TL_LIFELINK_AS_GPU, - __stringify(KBASE_TL_LIFELINK_AS_GPU), - "address space is deleted with gpu", - "@pp", - "address_space,gpu" - }, - { - KBASE_TL_RET_CTX_LPU, - __stringify(KBASE_TL_RET_CTX_LPU), - "context is retained by lpu", - "@pp", - "ctx,lpu" - }, - { - KBASE_TL_RET_ATOM_CTX, - __stringify(KBASE_TL_RET_ATOM_CTX), - "atom is retained by context", - "@pp", - "atom,ctx" - }, - { - KBASE_TL_RET_ATOM_LPU, - __stringify(KBASE_TL_RET_ATOM_LPU), - "atom is retained by lpu", - "@pps", - "atom,lpu,attrib_match_list" - }, - { - KBASE_TL_NRET_CTX_LPU, - __stringify(KBASE_TL_NRET_CTX_LPU), - "context is released by lpu", - "@pp", - "ctx,lpu" - }, - { - KBASE_TL_NRET_ATOM_CTX, - __stringify(KBASE_TL_NRET_ATOM_CTX), - "atom is released by context", - "@pp", - "atom,ctx" - }, - { - KBASE_TL_NRET_ATOM_LPU, - __stringify(KBASE_TL_NRET_ATOM_LPU), - "atom is released by lpu", - "@pp", - "atom,lpu" - }, - { - KBASE_TL_RET_AS_CTX, - __stringify(KBASE_TL_RET_AS_CTX), - "address space is retained by context", - "@pp", - "address_space,ctx" - }, - { - KBASE_TL_NRET_AS_CTX, - __stringify(KBASE_TL_NRET_AS_CTX), - "address space is released by context", - "@pp", - "address_space,ctx" - }, - { - KBASE_TL_RET_ATOM_AS, - __stringify(KBASE_TL_RET_ATOM_AS), - "atom is retained by address space", - "@pp", - "atom,address_space" - }, - { - KBASE_TL_NRET_ATOM_AS, - __stringify(KBASE_TL_NRET_ATOM_AS), - "atom is released by address space", - "@pp", - "atom,address_space" - }, - { - KBASE_TL_ATTRIB_ATOM_CONFIG, - __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), - "atom job slot attributes", - "@pLLI", - "atom,descriptor,affinity,config" - }, - { - KBASE_TL_ATTRIB_ATOM_PRIORITY, - __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY), - "atom priority", - "@pI", - "atom,prio" - }, - { - KBASE_TL_ATTRIB_ATOM_STATE, - __stringify(KBASE_TL_ATTRIB_ATOM_STATE), - "atom state", - "@pI", - "atom,state" - }, - { - KBASE_TL_ATTRIB_ATOM_PRIORITIZED, - __stringify(KBASE_TL_ATTRIB_ATOM_PRIORITIZED), - "atom caused priority change", - "@p", - "atom" - }, - { - KBASE_TL_ATTRIB_ATOM_JIT, - __stringify(KBASE_TL_ATTRIB_ATOM_JIT), - "jit done for atom", - "@pLLL", - "atom,edit_addr,new_addr,va_pages" - }, - { - KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, - __stringify(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO), - "Information about JIT allocations", - "@pLLLIIIII", - "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,flags,usg_id" - }, - { - KBASE_TL_ATTRIB_ATOM_JITFREEINFO, - __stringify(KBASE_TL_ATTRIB_ATOM_JITFREEINFO), - "Information about JIT frees", - "@pI", - "atom,j_id" - }, - { - KBASE_TL_ATTRIB_AS_CONFIG, - __stringify(KBASE_TL_ATTRIB_AS_CONFIG), - "address space attributes", - "@pLLL", - "address_space,transtab,memattr,transcfg" - }, - { - KBASE_TL_EVENT_LPU_SOFTSTOP, - __stringify(KBASE_TL_EVENT_LPU_SOFTSTOP), - "softstop event on given lpu", - "@p", - "lpu" - }, - { - KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, - __stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX), - "atom softstopped", - "@p", - "atom" - }, - { - KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, - __stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE), - "atom softstop issued", - "@p", - "atom" - }, - { - KBASE_TL_EVENT_ATOM_SOFTJOB_START, - __stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START), - "atom soft job has started", - "@p", - "atom" - }, - { - KBASE_TL_EVENT_ATOM_SOFTJOB_END, - __stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END), - "atom soft job has completed", - "@p", - "atom" - }, - { - KBASE_JD_GPU_SOFT_RESET, - __stringify(KBASE_JD_GPU_SOFT_RESET), - "gpu soft reset", - "@p", - "gpu" - }, -}; - -/* Descriptors of timeline messages transmitted in auxiliary events stream. */ -static const struct tp_desc tp_desc_aux[] = { - { - KBASE_AUX_PM_STATE, - __stringify(KBASE_AUX_PM_STATE), - "PM state", - "@IL", - "core_type,core_state_bitset" - }, - { - KBASE_AUX_PAGEFAULT, - __stringify(KBASE_AUX_PAGEFAULT), - "Page fault", - "@IL", - "ctx_nr,page_cnt_change" - }, - { - KBASE_AUX_PAGESALLOC, - __stringify(KBASE_AUX_PAGESALLOC), - "Total alloc pages change", - "@IL", - "ctx_nr,page_cnt" - }, - { - KBASE_AUX_DEVFREQ_TARGET, - __stringify(KBASE_AUX_DEVFREQ_TARGET), - "New device frequency target", - "@L", - "target_freq" - }, - { - KBASE_AUX_PROTECTED_ENTER_START, - __stringify(KBASE_AUX_PROTECTED_ENTER_START), - "enter protected mode start", - "@p", - "gpu" - }, - { - KBASE_AUX_PROTECTED_ENTER_END, - __stringify(KBASE_AUX_PROTECTED_ENTER_END), - "enter protected mode end", - "@p", - "gpu" - }, - { - KBASE_AUX_PROTECTED_LEAVE_START, - __stringify(KBASE_AUX_PROTECTED_LEAVE_START), - "leave protected mode start", - "@p", - "gpu" - }, - { - KBASE_AUX_PROTECTED_LEAVE_END, - __stringify(KBASE_AUX_PROTECTED_LEAVE_END), - "leave protected mode end", - "@p", - "gpu" - }, - { - KBASE_AUX_JIT_STATS, - __stringify(KBASE_AUX_JIT_STATS), - "per-bin JIT statistics", - "@IIIIII", - "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages" - } -}; - -#if MALI_UNIT_TEST -/* Number of bytes read by user. */ -static atomic_t tlstream_bytes_collected = {0}; - -/* Number of bytes generated by tracepoint messages. */ -static atomic_t tlstream_bytes_generated = {0}; -#endif /* MALI_UNIT_TEST */ - -/*****************************************************************************/ - -/* Indicator of whether the timeline stream file descriptor is used. */ -atomic_t kbase_tlstream_enabled = {0}; - -/*****************************************************************************/ - -/** - * kbasep_tlstream_get_timestamp - return timestamp - * - * Function returns timestamp value based on raw monotonic timer. Value will - * wrap around zero in case of overflow. - * Return: timestamp value - */ -static u64 kbasep_tlstream_get_timestamp(void) -{ - struct timespec ts; - u64 timestamp; - - getrawmonotonic(&ts); - timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; - return timestamp; -} - -/** - * kbasep_tlstream_write_bytes - write data to message buffer - * @buffer: buffer where data will be written - * @pos: position in the buffer where to place data - * @bytes: pointer to buffer holding data - * @len: length of data to be written - * - * Return: updated position in the buffer - */ -static size_t kbasep_tlstream_write_bytes( - char *buffer, - size_t pos, - const void *bytes, - size_t len) -{ - KBASE_DEBUG_ASSERT(buffer); - KBASE_DEBUG_ASSERT(bytes); - - memcpy(&buffer[pos], bytes, len); - - return pos + len; -} - -/** - * kbasep_tlstream_write_string - write string to message buffer - * @buffer: buffer where data will be written - * @pos: position in the buffer where to place data - * @string: pointer to buffer holding the source string - * @max_write_size: number of bytes that can be stored in buffer - * - * Return: updated position in the buffer - */ -static size_t kbasep_tlstream_write_string( - char *buffer, - size_t pos, - const char *string, - size_t max_write_size) -{ - u32 string_len; - - KBASE_DEBUG_ASSERT(buffer); - KBASE_DEBUG_ASSERT(string); - /* Timeline string consists of at least string length and nul - * terminator. */ - KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); - max_write_size -= sizeof(string_len); - - string_len = strlcpy( - &buffer[pos + sizeof(string_len)], - string, - max_write_size); - string_len += sizeof(char); - - /* Make sure that the source string fit into the buffer. */ - KBASE_DEBUG_ASSERT(string_len <= max_write_size); - - /* Update string length. */ - memcpy(&buffer[pos], &string_len, sizeof(string_len)); - - return pos + sizeof(string_len) + string_len; -} - -/** - * kbasep_tlstream_write_timestamp - write timestamp to message buffer - * @buffer: buffer where data will be written - * @pos: position in the buffer where to place data - * - * Return: updated position in the buffer - */ -static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos) -{ - u64 timestamp = kbasep_tlstream_get_timestamp(); - - return kbasep_tlstream_write_bytes( - buffer, pos, - ×tamp, sizeof(timestamp)); -} - -/** - * kbasep_tlstream_put_bits - put bits in a word - * @word: pointer to the words being modified - * @value: value that shall be written to given position - * @bitpos: position where value shall be written (in bits) - * @bitlen: length of value (in bits) - */ -static void kbasep_tlstream_put_bits( - u32 *word, - u32 value, - unsigned int bitpos, - unsigned int bitlen) -{ - const u32 mask = ((1 << bitlen) - 1) << bitpos; - - KBASE_DEBUG_ASSERT(word); - KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen)); - KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32); - - *word &= ~mask; - *word |= ((value << bitpos) & mask); -} - -/** - * kbasep_tlstream_packet_header_setup - setup the packet header - * @buffer: pointer to the buffer - * @pkt_family: packet's family - * @pkt_type: packet's type - * @pkt_class: packet's class - * @stream_id: stream id - * @numbered: non-zero if this stream is numbered - * - * Function sets up immutable part of packet header in the given buffer. - */ -static void kbasep_tlstream_packet_header_setup( - char *buffer, - enum tl_packet_family pkt_family, - enum tl_packet_class pkt_class, - enum tl_packet_type pkt_type, - unsigned int stream_id, - int numbered) -{ - u32 word0 = 0; - u32 word1 = 0; - - KBASE_DEBUG_ASSERT(buffer); - KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL); - KBASE_DEBUG_ASSERT( - (pkt_type == TL_PACKET_TYPE_HEADER) || - (pkt_type == TL_PACKET_TYPE_SUMMARY) || - (pkt_type == TL_PACKET_TYPE_BODY)); - KBASE_DEBUG_ASSERT( - (pkt_class == TL_PACKET_CLASS_OBJ) || - (pkt_class == TL_PACKET_CLASS_AUX)); - - kbasep_tlstream_put_bits( - &word0, pkt_family, - PACKET_FAMILY_POS, PACKET_FAMILY_LEN); - kbasep_tlstream_put_bits( - &word0, pkt_class, - PACKET_CLASS_POS, PACKET_CLASS_LEN); - kbasep_tlstream_put_bits( - &word0, pkt_type, - PACKET_TYPE_POS, PACKET_TYPE_LEN); - kbasep_tlstream_put_bits( - &word0, stream_id, - PACKET_STREAMID_POS, PACKET_STREAMID_LEN); - - if (numbered) - kbasep_tlstream_put_bits( - &word1, 1, - PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN); - - memcpy(&buffer[0], &word0, sizeof(word0)); - memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); -} - -/** - * kbasep_tlstream_packet_header_update - update the packet header - * @buffer: pointer to the buffer - * @data_size: amount of data carried in this packet - * - * Function updates mutable part of packet header in the given buffer. - * Note that value of data_size must not including size of the header. - */ -static void kbasep_tlstream_packet_header_update( - char *buffer, - size_t data_size) -{ - u32 word0; - u32 word1; - - KBASE_DEBUG_ASSERT(buffer); - CSTD_UNUSED(word0); - - memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1)); - - kbasep_tlstream_put_bits( - &word1, data_size, - PACKET_LENGTH_POS, PACKET_LENGTH_LEN); - - memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); -} - -/** - * kbasep_tlstream_packet_number_update - update the packet number - * @buffer: pointer to the buffer - * @counter: value of packet counter for this packet's stream - * - * Function updates packet number embedded within the packet placed in the - * given buffer. - */ -static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter) -{ - KBASE_DEBUG_ASSERT(buffer); - - memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); -} - -/** - * kbasep_timeline_stream_reset - reset stream - * @stream: pointer to the stream structure - * - * Function discards all pending messages and resets packet counters. - */ -static void kbasep_timeline_stream_reset(struct tl_stream *stream) -{ - unsigned int i; - - for (i = 0; i < PACKET_COUNT; i++) { - if (stream->numbered) - atomic_set( - &stream->buffer[i].size, - PACKET_HEADER_SIZE + - PACKET_NUMBER_SIZE); - else - atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); - } - - atomic_set(&stream->wbi, 0); - atomic_set(&stream->rbi, 0); -} - -/** - * kbasep_timeline_stream_init - initialize timeline stream - * @stream: pointer to the stream structure - * @stream_type: stream type - */ -static void kbasep_timeline_stream_init( - struct tl_stream *stream, - enum tl_stream_type stream_type) -{ - unsigned int i; - - KBASE_DEBUG_ASSERT(stream); - KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); - - spin_lock_init(&stream->lock); - - /* All packets carrying tracepoints shall be numbered. */ - if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) - stream->numbered = 1; - else - stream->numbered = 0; - - for (i = 0; i < PACKET_COUNT; i++) - kbasep_tlstream_packet_header_setup( - stream->buffer[i].data, - tl_stream_cfg[stream_type].pkt_family, - tl_stream_cfg[stream_type].pkt_class, - tl_stream_cfg[stream_type].pkt_type, - tl_stream_cfg[stream_type].stream_id, - stream->numbered); - - kbasep_timeline_stream_reset(tl_stream[stream_type]); -} - -/** - * kbasep_timeline_stream_term - terminate timeline stream - * @stream: pointer to the stream structure - */ -static void kbasep_timeline_stream_term(struct tl_stream *stream) -{ - KBASE_DEBUG_ASSERT(stream); -} - -/** - * kbasep_tlstream_msgbuf_submit - submit packet to the user space - * @stream: pointer to the stream structure - * @wb_idx_raw: write buffer index - * @wb_size: length of data stored in current buffer - * - * Function updates currently written buffer with packet header. Then write - * index is incremented and buffer is handled to user space. Parameters - * of new buffer are returned using provided arguments. - * - * Return: length of data in new buffer - * - * Warning: User must update the stream structure with returned value. - */ -static size_t kbasep_tlstream_msgbuf_submit( - struct tl_stream *stream, - unsigned int wb_idx_raw, - unsigned int wb_size) -{ - unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; - - /* Set stream as flushed. */ - atomic_set(&stream->autoflush_counter, -1); - - kbasep_tlstream_packet_header_update( - stream->buffer[wb_idx].data, - wb_size - PACKET_HEADER_SIZE); - - if (stream->numbered) - kbasep_tlstream_packet_number_update( - stream->buffer[wb_idx].data, - wb_idx_raw); - - /* Increasing write buffer index will expose this packet to the reader. - * As stream->lock is not taken on reader side we must make sure memory - * is updated correctly before this will happen. */ - smp_wmb(); - atomic_inc(&stream->wbi); - - /* Inform user that packets are ready for reading. */ - wake_up_interruptible(&tl_event_queue); - - wb_size = PACKET_HEADER_SIZE; - if (stream->numbered) - wb_size += PACKET_NUMBER_SIZE; - - return wb_size; -} - -/** - * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer - * @stream_type: type of the stream that shall be locked - * @msg_size: message size - * @flags: pointer to store flags passed back on stream release - * - * Function will lock the stream and reserve the number of bytes requested - * in msg_size for the user. - * - * Return: pointer to the buffer where message can be stored - * - * Warning: Stream must be released with kbasep_tlstream_msgbuf_release(). - * Only atomic operations are allowed while stream is locked - * (i.e. do not use any operation that may sleep). - */ -static char *kbasep_tlstream_msgbuf_acquire( - enum tl_stream_type stream_type, - size_t msg_size, - unsigned long *flags) __acquires(&stream->lock) -{ - struct tl_stream *stream; - unsigned int wb_idx_raw; - unsigned int wb_idx; - size_t wb_size; - - KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); - KBASE_DEBUG_ASSERT( - PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= - msg_size); - - stream = tl_stream[stream_type]; - - spin_lock_irqsave(&stream->lock, *flags); - - wb_idx_raw = atomic_read(&stream->wbi); - wb_idx = wb_idx_raw % PACKET_COUNT; - wb_size = atomic_read(&stream->buffer[wb_idx].size); - - /* Select next buffer if data will not fit into current one. */ - if (PACKET_SIZE < wb_size + msg_size) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - } - - /* Reserve space in selected buffer. */ - atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); - -#if MALI_UNIT_TEST - atomic_add(msg_size, &tlstream_bytes_generated); -#endif /* MALI_UNIT_TEST */ - - return &stream->buffer[wb_idx].data[wb_size]; -} - -/** - * kbasep_tlstream_msgbuf_release - unlock selected stream - * @stream_type: type of the stream that shall be locked - * @flags: value obtained during stream acquire - * - * Function releases stream that has been previously locked with a call to - * kbasep_tlstream_msgbuf_acquire(). - */ -static void kbasep_tlstream_msgbuf_release( - enum tl_stream_type stream_type, - unsigned long flags) __releases(&stream->lock) -{ - struct tl_stream *stream; - - KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); - - stream = tl_stream[stream_type]; - - /* Mark stream as containing unflushed data. */ - atomic_set(&stream->autoflush_counter, 0); - - spin_unlock_irqrestore(&stream->lock, flags); -} - -/*****************************************************************************/ - -/** - * kbasep_tlstream_flush_stream - flush stream - * @stype: type of stream to be flushed - * - * Flush pending data in timeline stream. - */ -static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) -{ - struct tl_stream *stream = tl_stream[stype]; - unsigned long flags; - unsigned int wb_idx_raw; - unsigned int wb_idx; - size_t wb_size; - size_t min_size = PACKET_HEADER_SIZE; - - if (stream->numbered) - min_size += PACKET_NUMBER_SIZE; - - spin_lock_irqsave(&stream->lock, flags); - - wb_idx_raw = atomic_read(&stream->wbi); - wb_idx = wb_idx_raw % PACKET_COUNT; - wb_size = atomic_read(&stream->buffer[wb_idx].size); - - if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, wb_size); - } - spin_unlock_irqrestore(&stream->lock, flags); -} - -/** - * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback - * @timer: unused - * - * Timer is executed periodically to check if any of the stream contains - * buffer ready to be submitted to user space. - */ -static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer) -{ - enum tl_stream_type stype; - int rcode; - - CSTD_UNUSED(timer); - - for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { - struct tl_stream *stream = tl_stream[stype]; - unsigned long flags; - unsigned int wb_idx_raw; - unsigned int wb_idx; - size_t wb_size; - size_t min_size = PACKET_HEADER_SIZE; - - int af_cnt = atomic_read(&stream->autoflush_counter); - - /* Check if stream contain unflushed data. */ - if (0 > af_cnt) - continue; - - /* Check if stream should be flushed now. */ - if (af_cnt != atomic_cmpxchg( - &stream->autoflush_counter, - af_cnt, - af_cnt + 1)) - continue; - if (!af_cnt) - continue; - - /* Autoflush this stream. */ - if (stream->numbered) - min_size += PACKET_NUMBER_SIZE; - - spin_lock_irqsave(&stream->lock, flags); - - wb_idx_raw = atomic_read(&stream->wbi); - wb_idx = wb_idx_raw % PACKET_COUNT; - wb_size = atomic_read(&stream->buffer[wb_idx].size); - - if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, - wb_size); - } - spin_unlock_irqrestore(&stream->lock, flags); - } - - if (atomic_read(&autoflush_timer_active)) - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); -} - -/** - * kbasep_tlstream_packet_pending - check timeline streams for pending packets - * @stype: pointer to variable where stream type will be placed - * @rb_idx_raw: pointer to variable where read buffer index will be placed - * - * Function checks all streams for pending packets. It will stop as soon as - * packet ready to be submitted to user space is detected. Variables under - * pointers, passed as the parameters to this function will be updated with - * values pointing to right stream and buffer. - * - * Return: non-zero if any of timeline streams has at last one packet ready - */ -static int kbasep_tlstream_packet_pending( - enum tl_stream_type *stype, - unsigned int *rb_idx_raw) -{ - int pending = 0; - - KBASE_DEBUG_ASSERT(stype); - KBASE_DEBUG_ASSERT(rb_idx_raw); - - for ( - *stype = 0; - (*stype < TL_STREAM_TYPE_COUNT) && !pending; - (*stype)++) { - if (NULL != tl_stream[*stype]) { - *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi); - /* Read buffer index may be updated by writer in case of - * overflow. Read and write buffer indexes must be - * loaded in correct order. */ - smp_rmb(); - if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw) - pending = 1; - } - } - (*stype)--; - - return pending; -} - -/** - * kbasep_tlstream_read - copy data from streams to buffer provided by user - * @filp: pointer to file structure (unused) - * @buffer: pointer to the buffer provided by user - * @size: maximum amount of data that can be stored in the buffer - * @f_pos: pointer to file offset (unused) - * - * Return: number of bytes stored in the buffer - */ -static ssize_t kbasep_tlstream_read( - struct file *filp, - char __user *buffer, - size_t size, - loff_t *f_pos) -{ - ssize_t copy_len = 0; - - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(f_pos); - - if (!buffer) - return -EINVAL; - - if ((0 > *f_pos) || (PACKET_SIZE > size)) - return -EINVAL; - - mutex_lock(&tl_reader_lock); - - while (copy_len < size) { - enum tl_stream_type stype; - unsigned int rb_idx_raw = 0; - unsigned int wb_idx_raw; - unsigned int rb_idx; - size_t rb_size; - - /* If we don't have any data yet, wait for packet to be - * submitted. If we already read some packets and there is no - * packet pending return back to user. */ - if (0 < copy_len) { - if (!kbasep_tlstream_packet_pending( - &stype, - &rb_idx_raw)) - break; - } else { - if (wait_event_interruptible( - tl_event_queue, - kbasep_tlstream_packet_pending( - &stype, - &rb_idx_raw))) { - copy_len = -ERESTARTSYS; - break; - } - } - - /* Check if this packet fits into the user buffer. - * If so copy its content. */ - rb_idx = rb_idx_raw % PACKET_COUNT; - rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size); - if (rb_size > size - copy_len) - break; - if (copy_to_user( - &buffer[copy_len], - tl_stream[stype]->buffer[rb_idx].data, - rb_size)) { - copy_len = -EFAULT; - break; - } - - /* If the distance between read buffer index and write - * buffer index became more than PACKET_COUNT, then overflow - * happened and we need to ignore the last portion of bytes - * that we have just sent to user. - */ - smp_rmb(); - wb_idx_raw = atomic_read(&tl_stream[stype]->wbi); - - if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { - copy_len += rb_size; - atomic_inc(&tl_stream[stype]->rbi); -#if MALI_UNIT_TEST - atomic_add(rb_size, &tlstream_bytes_collected); -#endif /* MALI_UNIT_TEST */ - - } else { - const unsigned int new_rb_idx_raw = - wb_idx_raw - PACKET_COUNT + 1; - /* Adjust read buffer index to the next valid buffer */ - atomic_set(&tl_stream[stype]->rbi, new_rb_idx_raw); - } - } - - mutex_unlock(&tl_reader_lock); - - return copy_len; -} - -/** - * kbasep_tlstream_poll - poll timeline stream for packets - * @filp: pointer to file structure - * @wait: pointer to poll table - * Return: POLLIN if data can be read without blocking, otherwise zero - */ -static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait) -{ - enum tl_stream_type stream_type; - unsigned int rb_idx; - - KBASE_DEBUG_ASSERT(filp); - KBASE_DEBUG_ASSERT(wait); - - poll_wait(filp, &tl_event_queue, wait); - if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx)) - return POLLIN; - return 0; -} - -/** - * kbasep_tlstream_release - release timeline stream descriptor - * @inode: pointer to inode structure - * @filp: pointer to file structure - * - * Return always return zero - */ -static int kbasep_tlstream_release(struct inode *inode, struct file *filp) -{ - KBASE_DEBUG_ASSERT(inode); - KBASE_DEBUG_ASSERT(filp); - CSTD_UNUSED(inode); - CSTD_UNUSED(filp); - - /* Stop autoflush timer before releasing access to streams. */ - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); - - atomic_set(&kbase_tlstream_enabled, 0); - return 0; -} - -/** - * kbasep_tlstream_timeline_header - prepare timeline header stream packet - * @stream_type: type of the stream that will carry header data - * @tp_desc: pointer to array with tracepoint descriptors - * @tp_count: number of descriptors in the given array - * - * Functions fills in information about tracepoints stored in body stream - * associated with this header stream. - */ -static void kbasep_tlstream_timeline_header( - enum tl_stream_type stream_type, - const struct tp_desc *tp_desc, - u32 tp_count) -{ - const u8 tv = SWTRACE_VERSION; /* protocol version */ - const u8 ps = sizeof(void *); /* pointer size */ - size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); - char *buffer; - size_t pos = 0; - unsigned long flags; - unsigned int i; - - KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); - KBASE_DEBUG_ASSERT(tp_desc); - - /* Calculate the size of the timeline message. */ - for (i = 0; i < tp_count; i++) { - msg_size += sizeof(tp_desc[i].id); - msg_size += - strnlen(tp_desc[i].id_str, STRLEN_MAX) + - sizeof(char) + sizeof(u32); - msg_size += - strnlen(tp_desc[i].name, STRLEN_MAX) + - sizeof(char) + sizeof(u32); - msg_size += - strnlen(tp_desc[i].arg_types, STRLEN_MAX) + - sizeof(char) + sizeof(u32); - msg_size += - strnlen(tp_desc[i].arg_names, STRLEN_MAX) + - sizeof(char) + sizeof(u32); - } - - KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size); - - buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv)); - pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &tp_count, sizeof(tp_count)); - - for (i = 0; i < tp_count; i++) { - pos = kbasep_tlstream_write_bytes( - buffer, pos, - &tp_desc[i].id, sizeof(tp_desc[i].id)); - pos = kbasep_tlstream_write_string( - buffer, pos, - tp_desc[i].id_str, msg_size - pos); - pos = kbasep_tlstream_write_string( - buffer, pos, - tp_desc[i].name, msg_size - pos); - pos = kbasep_tlstream_write_string( - buffer, pos, - tp_desc[i].arg_types, msg_size - pos); - pos = kbasep_tlstream_write_string( - buffer, pos, - tp_desc[i].arg_names, msg_size - pos); - } - - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(stream_type, flags); - - /* We don't expect any more data to be read in this stream. - * As header stream must be read before its associated body stream, - * make this packet visible to the user straightaway. */ - kbasep_tlstream_flush_stream(stream_type); -} - -/*****************************************************************************/ - -int kbase_tlstream_init(void) -{ - enum tl_stream_type i; - - /* Prepare stream structures. */ - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { - tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL); - if (!tl_stream[i]) - break; - kbasep_timeline_stream_init(tl_stream[i], i); - } - if (TL_STREAM_TYPE_COUNT > i) { - for (; i > 0; i--) { - kbasep_timeline_stream_term(tl_stream[i - 1]); - kfree(tl_stream[i - 1]); - } - return -ENOMEM; - } - - /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 0); - kbase_timer_setup(&autoflush_timer, - kbasep_tlstream_autoflush_timer_callback); - - return 0; -} - -void kbase_tlstream_term(void) -{ - enum tl_stream_type i; - - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { - kbasep_timeline_stream_term(tl_stream[i]); - kfree(tl_stream[i]); - } -} - -static void kbase_create_timeline_objects(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - unsigned int lpu_id; - unsigned int as_nr; - struct kbasep_kctx_list_element *element; - - /* Create LPU objects. */ - for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - u32 *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu); - } - - /* Create Address Space objects. */ - for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr); - - /* Create GPU object and make it retain all LPUs and address spaces. */ - KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU( - kbdev, - kbdev->gpu_props.props.raw_props.gpu_id, - kbdev->gpu_props.num_cores); - - for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { - void *lpu = - &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; - KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev); - } - for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) - KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU( - &kbdev->as[as_nr], - kbdev); - - /* Create object for each known context. */ - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry(element, &kbdev->kctx_list, link) { - KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX( - element->kctx, - element->kctx->id, - (u32)(element->kctx->tgid)); - } - /* Before releasing the lock, reset body stream buffers. - * This will prevent context creation message to be directed to both - * summary and body stream. - */ - kbase_tlstream_reset_body_streams(); - mutex_unlock(&kbdev->kctx_list_lock); - /* Static object are placed into summary packet that needs to be - * transmitted first. Flush all streams to make it available to - * user space. - */ - kbase_tlstream_flush_streams(); -} - -int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags) -{ - int ret; - u32 tlstream_enabled = TLSTREAM_ENABLED | flags; - - if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) { - int rcode; - - ret = anon_inode_getfd( - "[mali_tlstream]", - &kbasep_tlstream_fops, - kctx, - O_RDONLY | O_CLOEXEC); - if (ret < 0) { - atomic_set(&kbase_tlstream_enabled, 0); - return ret; - } - - /* Reset and initialize header streams. */ - kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ_HEADER]); - kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]); - kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_AUX_HEADER]); - kbasep_tlstream_timeline_header( - TL_STREAM_TYPE_OBJ_HEADER, - tp_desc_obj, - ARRAY_SIZE(tp_desc_obj)); - kbasep_tlstream_timeline_header( - TL_STREAM_TYPE_AUX_HEADER, - tp_desc_aux, - ARRAY_SIZE(tp_desc_aux)); - - /* Start autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); - - /* If job dumping is enabled, readjust the software event's - * timeout as the default value of 3 seconds is often - * insufficient. */ - if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { - dev_info(kctx->kbdev->dev, - "Job dumping is enabled, readjusting the software event's timeout\n"); - atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms, - 1800000); - } - - /* Summary stream was cleared during acquire. - * Create static timeline objects that will be - * read by client. - */ - kbase_create_timeline_objects(kctx); - - } else { - ret = -EBUSY; - } - - return ret; -} - -void kbase_tlstream_flush_streams(void) -{ - enum tl_stream_type stype; - - for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) - kbasep_tlstream_flush_stream(stype); -} - -void kbase_tlstream_reset_body_streams(void) -{ - kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_OBJ]); - kbasep_timeline_stream_reset( - tl_stream[TL_STREAM_TYPE_AUX]); -} - -#if MALI_UNIT_TEST -void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) -{ - KBASE_DEBUG_ASSERT(bytes_collected); - KBASE_DEBUG_ASSERT(bytes_generated); - *bytes_collected = atomic_read(&tlstream_bytes_collected); - *bytes_generated = atomic_read(&tlstream_bytes_generated); -} -#endif /* MALI_UNIT_TEST */ - -/*****************************************************************************/ - -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) -{ - const u32 msg_id = KBASE_TL_NEW_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + - sizeof(tgid); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &nr, sizeof(nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &tgid, sizeof(tgid)); - - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) -{ - const u32 msg_id = KBASE_TL_NEW_GPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) + - sizeof(core_count); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &id, sizeof(id)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &core_count, sizeof(core_count)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) -{ - const u32 msg_id = KBASE_TL_NEW_LPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) + - sizeof(fn); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &nr, sizeof(nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &fn, sizeof(fn)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) -{ - const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) -{ - const u32 msg_id = KBASE_TL_NEW_AS; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &nr, sizeof(nr)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) -{ - const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ_SUMMARY, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); -} - -/*****************************************************************************/ - -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) -{ - const u32 msg_id = KBASE_TL_NEW_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + - sizeof(tgid); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &nr, sizeof(nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &tgid, sizeof(tgid)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) -{ - const u32 msg_id = KBASE_TL_NEW_ATOM; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) + - sizeof(nr); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &nr, sizeof(nr)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_del_ctx(void *context) -{ - const u32 msg_id = KBASE_TL_DEL_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_del_atom(void *atom) -{ - const u32 msg_id = KBASE_TL_DEL_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) -{ - const u32 msg_id = KBASE_TL_RET_CTX_LPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) -{ - const u32 msg_id = KBASE_TL_RET_ATOM_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list) -{ - const u32 msg_id = KBASE_TL_RET_ATOM_LPU; - const size_t msg_s0 = sizeof(u32) + sizeof(char) + - strnlen(attrib_match_list, STRLEN_MAX); - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + - sizeof(atom) + sizeof(lpu) + msg_s0; - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - pos = kbasep_tlstream_write_string( - buffer, pos, attrib_match_list, msg_s0); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) -{ - const u32 msg_id = KBASE_TL_NRET_CTX_LPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) -{ - const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &context, sizeof(context)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) -{ - const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) -{ - const u32 msg_id = KBASE_TL_RET_AS_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &ctx, sizeof(ctx)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) -{ - const u32 msg_id = KBASE_TL_NRET_AS_CTX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &ctx, sizeof(ctx)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) -{ - const u32 msg_id = KBASE_TL_RET_ATOM_AS; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) -{ - const u32 msg_id = KBASE_TL_NRET_ATOM_AS; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + - sizeof(jd) + sizeof(affinity) + sizeof(config); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &jd, sizeof(jd)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &affinity, sizeof(affinity)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &config, sizeof(config)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &prio, sizeof(prio)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &state, sizeof(state)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr, u64 va_pages) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) - + sizeof(edit_addr) + sizeof(new_addr) + sizeof(va_pages); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &edit_addr, sizeof(edit_addr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &new_addr, sizeof(new_addr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &va_pages, sizeof(va_pages)); - - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_jitallocinfo( - void *atom, u64 va_pages, u64 commit_pages, u64 extent, - u32 jit_id, u32 bin_id, u32 max_allocations, u32 jit_flags, - u32 usage_id) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + - sizeof(va_pages) + sizeof(commit_pages) + - sizeof(extent) + sizeof(jit_id) + - sizeof(bin_id) + sizeof(max_allocations) + - sizeof(jit_flags) + sizeof(usage_id); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, - sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &va_pages, sizeof(va_pages)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &commit_pages, sizeof(commit_pages)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &extent, sizeof(extent)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &jit_id, sizeof(jit_id)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &bin_id, sizeof(bin_id)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &max_allocations, - sizeof(max_allocations)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &jit_flags, sizeof(jit_flags)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &usage_id, sizeof(usage_id)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id) -{ - const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(jit_id); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, - sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &jit_id, sizeof(jit_id)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - - -void __kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg) -{ - const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(as) + - sizeof(transtab) + sizeof(memattr) + sizeof(transcfg); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &as, sizeof(as)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &transtab, sizeof(transtab)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &memattr, sizeof(memattr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &transcfg, sizeof(transcfg)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu) -{ - const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(lpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &lpu, sizeof(lpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom) -{ - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom) -{ - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_event_atom_softjob_start(void *atom) -{ - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_event_atom_softjob_end(void *atom) -{ - const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom, sizeof(atom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) -{ - const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -/*****************************************************************************/ - -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) -{ - const u32 msg_id = KBASE_AUX_PM_STATE; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + - sizeof(state); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &core_type, sizeof(core_type)); - pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) -{ - const u32 msg_id = KBASE_AUX_PAGEFAULT; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + - sizeof(page_count_change); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, - &page_count_change, sizeof(page_count_change)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) -{ - const u32 msg_id = KBASE_AUX_PAGESALLOC; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + - sizeof(page_count); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &page_count, sizeof(page_count)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_devfreq_target(u64 target_freq) -{ - const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(target_freq); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &target_freq, sizeof(target_freq)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_protected_enter_start(void *gpu) -{ - const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} -void __kbase_tlstream_aux_protected_enter_end(void *gpu) -{ - const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_protected_leave_start(void *gpu) -{ - const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} -void __kbase_tlstream_aux_protected_leave_end(void *gpu) -{ - const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bid, - u32 max_allocs, u32 allocs, - u32 va_pages, u32 ph_pages) -{ - const u32 msg_id = KBASE_AUX_JIT_STATS; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) + - sizeof(ctx_nr) + sizeof(bid) + - sizeof(max_allocs) + sizeof(allocs) + - sizeof(va_pages) + sizeof(ph_pages); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &ctx_nr, sizeof(ctx_nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &bid, sizeof(bid)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &max_allocs, sizeof(max_allocs)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &allocs, sizeof(allocs)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &va_pages, sizeof(va_pages)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &ph_pages, sizeof(ph_pages)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.h deleted file mode 100755 index e2a3ea46a871..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_tlstream.h +++ /dev/null @@ -1,680 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#if !defined(_KBASE_TLSTREAM_H) -#define _KBASE_TLSTREAM_H - -#include - -/*****************************************************************************/ - -/** - * kbase_tlstream_init - initialize timeline infrastructure in kernel - * Return: zero on success, negative number on error - */ -int kbase_tlstream_init(void); - -/** - * kbase_tlstream_term - terminate timeline infrastructure in kernel - * - * Timeline need have to been previously enabled with kbase_tlstream_init(). - */ -void kbase_tlstream_term(void); - -/** - * kbase_tlstream_acquire - acquire timeline stream file descriptor - * @kctx: kernel common context - * @flags: timeline stream flags - * - * This descriptor is meant to be used by userspace timeline to gain access to - * kernel timeline stream. This stream is later broadcasted by user space to the - * timeline client. - * Only one entity can own the descriptor at any given time. Descriptor shall be - * closed if unused. If descriptor cannot be obtained (i.e. when it is already - * being used) return will be a negative value. - * - * Return: file descriptor on success, negative number on error - */ -int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags); - -/** - * kbase_tlstream_flush_streams - flush timeline streams. - * - * Function will flush pending data in all timeline streams. - */ -void kbase_tlstream_flush_streams(void); - -/** - * kbase_tlstream_reset_body_streams - reset timeline body streams. - * - * Function will discard pending data in all timeline body streams. - */ -void kbase_tlstream_reset_body_streams(void); - -#if MALI_UNIT_TEST -/** - * kbase_tlstream_test - start timeline stream data generator - * @tpw_count: number of trace point writers in each context - * @msg_delay: time delay in milliseconds between trace points written by one - * writer - * @msg_count: number of trace points written by one writer - * @aux_msg: if non-zero aux messages will be included - * - * This test starts a requested number of asynchronous writers in both IRQ and - * thread context. Each writer will generate required number of test - * tracepoints (tracepoints with embedded information about writer that - * should be verified by user space reader). Tracepoints will be emitted in - * all timeline body streams. If aux_msg is non-zero writer will also - * generate not testable tracepoints (tracepoints without information about - * writer). These tracepoints are used to check correctness of remaining - * timeline message generating functions. Writer will wait requested time - * between generating another set of messages. This call blocks until all - * writers finish. - */ -void kbase_tlstream_test( - unsigned int tpw_count, - unsigned int msg_delay, - unsigned int msg_count, - int aux_msg); - -/** - * kbase_tlstream_stats - read timeline stream statistics - * @bytes_collected: will hold number of bytes read by the user - * @bytes_generated: will hold number of bytes generated by trace points - */ -void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); -#endif /* MALI_UNIT_TEST */ - -/*****************************************************************************/ - -#define TL_ATOM_STATE_IDLE 0 -#define TL_ATOM_STATE_READY 1 -#define TL_ATOM_STATE_DONE 2 -#define TL_ATOM_STATE_POSTED 3 - -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); -void __kbase_tlstream_tl_del_ctx(void *context); -void __kbase_tlstream_tl_del_atom(void *atom); -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list); -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); -void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio); -void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state); -void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom); -void __kbase_tlstream_tl_attrib_atom_jit( - void *atom, u64 edit_addr, u64 new_addr, u64 va_pages); -void __kbase_tlstream_tl_attrib_atom_jitallocinfo( - void *atom, u64 va_pages, u64 commit_pages, u64 extent, - u32 jit_id, u32 bin_id, u32 max_allocations, u32 flags, - u32 usage_id); -void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(void *atom, u32 jit_id); -void __kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); -void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom); -void __kbase_tlstream_tl_event_lpu_softstop(void *lpu); -void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom); -void __kbase_tlstream_tl_event_atom_softjob_start(void *atom); -void __kbase_tlstream_tl_event_atom_softjob_end(void *atom); -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); -void __kbase_tlstream_aux_devfreq_target(u64 target_freq); -void __kbase_tlstream_aux_protected_enter_start(void *gpu); -void __kbase_tlstream_aux_protected_enter_end(void *gpu); -void __kbase_tlstream_aux_protected_leave_start(void *gpu); -void __kbase_tlstream_aux_protected_leave_end(void *gpu); -void __kbase_tlstream_aux_jit_stats(u32 ctx_nr, u32 bin_id, - u32 max_allocations, u32 allocations, - u32 va_pages_nr, u32 ph_pages_nr); - -#define TLSTREAM_ENABLED (1 << 31) - -extern atomic_t kbase_tlstream_enabled; - -#define __TRACE_IF_ENABLED(trace_name, ...) \ - do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ - } while (0) - -#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...) \ - do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ - } while (0) - -#define __TRACE_IF_ENABLED_JD(trace_name, ...) \ - do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ - } while (0) - -/*****************************************************************************/ - -/** - * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline - * summary - * @context: name of the context object - * @nr: context number - * @tgid: thread Group Id - * - * Function emits a timeline message informing about context creation. Context - * is created with context number (its attribute), that can be used to link - * kbase context with userspace context. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) - -/** - * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary - * @gpu: name of the GPU object - * @id: id value of this GPU - * @core_count: number of cores this GPU hosts - * - * Function emits a timeline message informing about GPU creation. GPU is - * created with two attributes: id and core count. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \ - __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) - -/** - * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary - * @lpu: name of the Logical Processing Unit object - * @nr: sequential number assigned to this LPU - * @fn: property describing this LPU's functional abilities - * - * Function emits a timeline message informing about LPU creation. LPU is - * created with two attributes: number linking this LPU with GPU's job slot - * and function bearing information about this LPU abilities. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \ - __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) - -/** - * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU - * @lpu: name of the Logical Processing Unit object - * @gpu: name of the GPU object - * - * Function emits a timeline message informing that LPU object shall be deleted - * along with GPU object. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) - -/** - * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary - * @as: name of the address space object - * @nr: sequential number assigned to this address space - * - * Function emits a timeline message informing about address space creation. - * Address space is created with one attribute: number identifying this - * address space. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \ - __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) - -/** - * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU - * @as: name of the address space object - * @gpu: name of the GPU object - * - * Function emits a timeline message informing that address space object - * shall be deleted along with GPU object. - * This message is directed to timeline summary stream. - */ -#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) - -/** - * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline - * @context: name of the context object - * @nr: context number - * @tgid: thread Group Id - * - * Function emits a timeline message informing about context creation. Context - * is created with context number (its attribute), that can be used to link - * kbase context with userspace context. - */ -#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) - -/** - * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline - * @atom: name of the atom object - * @nr: sequential number assigned to this atom - * - * Function emits a timeline message informing about atom creation. Atom is - * created with atom number (its attribute) that links it with actual work - * bucket id understood by hardware. - */ -#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \ - __TRACE_IF_ENABLED(tl_new_atom, atom, nr) - -/** - * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline - * @context: name of the context object - * - * Function emits a timeline message informing that context object ceased to - * exist. - */ -#define KBASE_TLSTREAM_TL_DEL_CTX(context) \ - __TRACE_IF_ENABLED(tl_del_ctx, context) - -/** - * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline - * @atom: name of the atom object - * - * Function emits a timeline message informing that atom object ceased to - * exist. - */ -#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \ - __TRACE_IF_ENABLED(tl_del_atom, atom) - -/** - * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU - * @context: name of the context object - * @lpu: name of the Logical Processing Unit object - * - * Function emits a timeline message informing that context is being held - * by LPU and must not be deleted unless it is released. - */ -#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \ - __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context - * @atom: name of the atom object - * @context: name of the context object - * - * Function emits a timeline message informing that atom object is being held - * by context and must not be deleted unless it is released. - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \ - __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object - * @attrib_match_list: list containing match operator attributes - * - * Function emits a timeline message informing that atom object is being held - * by LPU and must not be deleted unless it is released. - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \ - __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) - -/** - * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU - * @context: name of the context object - * @lpu: name of the Logical Processing Unit object - * - * Function emits a timeline message informing that context is being released - * by LPU object. - */ -#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \ - __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context - * @atom: name of the atom object - * @context: name of the context object - * - * Function emits a timeline message informing that atom object is being - * released by context. - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \ - __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object - * - * Function emits a timeline message informing that atom object is being - * released by LPU. - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \ - __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) - -/** - * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context - * @as: name of the address space object - * @ctx: name of the context object - * - * Function emits a timeline message informing that address space object - * is being held by the context object. - */ -#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \ - __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) - -/** - * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context - * @as: name of the address space object - * @ctx: name of the context object - * - * Function emits a timeline message informing that address space object - * is being released by atom. - */ -#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \ - __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) - -/** - * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space - * @atom: name of the atom object - * @as: name of the address space object - * - * Function emits a timeline message informing that atom object is being held - * by address space and must not be deleted unless it is released. - */ -#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \ - __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) - -/** - * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space - * @atom: name of the atom object - * @as: name of the address space object - * - * Function emits a timeline message informing that atom object is being - * released by address space. - */ -#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \ - __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes - * @atom: name of the atom object - * @jd: job descriptor address - * @affinity: job affinity - * @config: job config - * - * Function emits a timeline message containing atom attributes. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \ - __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority - * @atom: name of the atom object - * @prio: atom priority - * - * Function emits a timeline message containing atom priority. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state - * @atom: name of the atom object - * @state: atom state - * - * Function emits a timeline message containing atom state. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom was prioritized - * @atom: name of the atom object - * - * Function emits a timeline message signalling priority change - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(atom) \ - __TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, atom) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom - * @atom: atom identifier - * @edit_addr: address edited by jit - * @new_addr: address placed into the edited location - * @va_pages: maximum number of pages this jit can allocate - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr, va_pages) \ - __TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, \ - new_addr, va_pages) - -/** - * Information about the JIT allocation atom. - * - * @atom: Atom identifier. - * @va_pages: The minimum number of virtual pages required. - * @commit_pages: The minimum number of physical pages which - * should back the allocation. - * @extent: Granularity of physical pages to grow the - * allocation by during a fault. - * @jit_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * @bin_id: The JIT allocation bin, used in conjunction with - * @max_allocations to limit the number of each - * type of JIT allocation. - * @max_allocations: The maximum number of allocations allowed within - * the bin specified by @bin_id. Should be the same - * for all JIT allocations within the same bin. - * @jit_flags: Flags specifying the special requirements for - * the JIT allocation. - * @usage_id: A hint about which allocation should be reused. - * The kernel should attempt to use a previous - * allocation with the same usage_id - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(atom, va_pages,\ - commit_pages, extent, jit_id, bin_id,\ - max_allocations, jit_flags, usage_id) \ - __TRACE_IF_ENABLED(tl_attrib_atom_jitallocinfo, atom, va_pages,\ - commit_pages, extent, jit_id, bin_id,\ - max_allocations, jit_flags, usage_id) - -/** - * Information about the JIT free atom. - * - * @atom: Atom identifier. - * @jit_id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(atom, jit_id) \ - __TRACE_IF_ENABLED(tl_attrib_atom_jitfreeinfo, atom, jit_id) - -/** - * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes - * @as: assigned address space - * @transtab: configuration of the TRANSTAB register - * @memattr: configuration of the MEMATTR register - * @transcfg: configuration of the TRANSCFG register (or zero if not present) - * - * Function emits a timeline message containing address space attributes. - */ -#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \ - __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - * @atom: atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom) - -/** - * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - * @lpu: name of the LPU object - */ -#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \ - __TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - * @atom: atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - * @atom: atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom) - -/** - * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - * @atom: atom identifier - */ -#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \ - __TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom) - -/** - * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset - * @gpu: name of the GPU object - * - * This imperative tracepoint is specific to job dumping. - * Function emits a timeline message indicating GPU soft reset. - */ -#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \ - __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) - - -/** - * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state - * @core_type: core type (shader, tiler, l2 cache, l3 cache) - * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) - */ -#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \ - __TRACE_IF_ENABLED(aux_pm_state, core_type, state) - -/** - * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event - * resulting in new pages being mapped - * @ctx_nr: kernel context number - * @page_count_change: number of pages to be added - */ -#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \ - __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) - -/** - * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated - * pages is changed - * @ctx_nr: kernel context number - * @page_count: number of pages used by the context - */ -#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \ - __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) - -/** - * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS - * frequency - * @target_freq: new target frequency - */ -#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \ - __TRACE_IF_ENABLED(aux_devfreq_target, target_freq) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning - * to protected mode - * @gpu: name of the GPU object - * - * Function emits a timeline message indicating the GPU is starting to - * transition to protected mode. - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning - * to protected mode - * @gpu: name of the GPU object - * - * Function emits a timeline message indicating the GPU has finished - * transitioning to protected mode. - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning - * to non-protected mode - * @gpu: name of the GPU object - * - * Function emits a timeline message indicating the GPU is starting to - * transition to non-protected mode. - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu) - -/** - * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning - * to non-protected mode - * @gpu: name of the GPU object - * - * Function emits a timeline message indicating the GPU has finished - * transitioning to non-protected mode. - */ -#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \ - __TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu) - -/** - * KBASE_TLSTREAM_AUX_JIT_STATS - JIT allocations per bin statistics - * - * @ctx_nr: kernel context number - * @bid: JIT bin id - * @max_allocs: maximum allocations allowed in this bin. - * UINT_MAX is a special value. It denotes that - * the parameter was not changed since the last time. - * @allocs: number of active allocations in this bin - * @va_pages: number of virtual pages allocated in this bin - * @ph_pages: number of physical pages allocated in this bin - * - * Function emits a timeline message indicating the JIT statistics - * for a given bin have chaned. - */ -#define KBASE_TLSTREAM_AUX_JIT_STATS(ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages) \ - __TRACE_IF_ENABLED(aux_jit_stats, ctx_nr, bid, \ - max_allocs, allocs, \ - va_pages, ph_pages) -#endif /* _KBASE_TLSTREAM_H */ - diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_trace_defs.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_trace_defs.h deleted file mode 100755 index 77fb8183a3d1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_trace_defs.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** - * ***** DO NOT INCLUDE DIRECTLY ***** - * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ - -/* - * The purpose of this header file is just to contain a list of trace code idenitifers - * - * Each identifier is wrapped in a macro, so that its string form and enum form can be created - * - * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. - * - * This allows automatic creation of an enum and a corresponding array of strings - * - * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. - * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. - * - * e.g.: - * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X - * typedef enum - * { - * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) - * #include "mali_kbase_trace_defs.h" - * #undef KBASE_TRACE_CODE_MAKE_CODE - * } kbase_trace_code; - * - * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE - * - * - * The use of the macro here is: - * - KBASE_TRACE_CODE_MAKE_CODE( X ) - * - * Which produces: - * - For an enum, KBASE_TRACE_CODE_X - * - For a string, "X" - * - * - * For example: - * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: - * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum - * - "JM_JOB_COMPLETE" for the string - * - To use it to trace an event, do: - * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); - */ - -#if 0 /* Dummy section to avoid breaking formatting */ -int dummy_array[] = { -#endif - -/* - * Core events - */ - /* no info_val, no gpu_addr, no atom */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), - /* no info_val, no gpu_addr, no atom */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), - /* info_val == GPU_IRQ_STATUS register */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), - /* info_val == bits cleared */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), - /* info_val == GPU_IRQ_STATUS register */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), - /* GPU addr==dump address */ - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), - KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), -/* - * Job Slot management events - */ - /* info_val==irq rawstat at start */ - KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), - /* info_val==jobs processed */ - KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), -/* In the following: - * - * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) - * - uatom==kernel-side mapped uatom address (for correlation with user-side) - */ - /* info_val==exit code; gpu_addr==chain gpuaddr */ - KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), - /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), - /* gpu_addr is as follows: - * - If JS_STATUS active after soft-stop, val==gpu addr written to - * JS_HEAD on submit - * - otherwise gpu_addr==0 */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), - KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), - /* gpu_addr==JS_TAIL read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), -/* gpu_addr is as follows: - * - If JS_STATUS active before soft-stop, val==JS_HEAD - * - otherwise gpu_addr==0 - */ - /* gpu_addr==JS_HEAD read */ - KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), - KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), - KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), - /* info_val == is_scheduled */ - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), - /* info_val == is_scheduled */ - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), - KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), - /* info_val == nr jobs submitted */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), - /* gpu_addr==JS_HEAD_NEXT last written */ - KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), - KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), - KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), - KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), -/* - * Job dispatch events - */ - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), - /* gpu_addr==0, info_val==0, uatom==0 */ - KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), -/* - * Scheduler Core events - */ - KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), - /* gpu_addr==last value written/would be written to JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), - /* gpu_addr==value to write into JS_HEAD */ - KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), - /* info_val == lower 32 bits of rechecked affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), - /* info_val == lower 32 bits of rechecked affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), - /* info_val == lower 32 bits of affinity */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), - /* info_val == the ctx attribute now on ctx */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), - /* info_val == the ctx attribute now on runpool */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), - /* info_val == the ctx attribute now off ctx */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), - /* info_val == the ctx attribute now off runpool */ - KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), -/* - * Scheduler Policy events - */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), - /* info_val == whether it was evicted */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), - /* gpu_addr==JS_HEAD to write if the job were run */ - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), - KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), -/* - * Power Management Events - */ - KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), - KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), - /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ - KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), - KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), - KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), - KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), - KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), - KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), - KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), - /* info_val == policy number, or -1 for "Already changing" */ - KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), - KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), - /* info_val == policy number */ - KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), - /* info_val == policy number */ - KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), -/* Unused code just to make it easier to not have a comma at the end. - * All other codes MUST come before this */ - KBASE_TRACE_CODE_MAKE_CODE(DUMMY) - -#if 0 /* Dummy section to avoid breaking formatting */ -}; -#endif - -/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_utility.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_utility.h deleted file mode 100755 index 8d4f044376a9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_utility.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * - * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#ifndef _KBASE_UTILITY_H -#define _KBASE_UTILITY_H - -#ifndef _KBASE_H_ -#error "Don't include this file directly, use mali_kbase.h instead" -#endif - -static inline void kbase_timer_setup(struct timer_list *timer, - void (*callback)(struct timer_list *timer)) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) - setup_timer(timer, (void (*)(unsigned long)) callback, - (unsigned long) timer); -#else - timer_setup(timer, callback, 0); -#endif -} - -#ifndef WRITE_ONCE - #ifdef ASSIGN_ONCE - #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) - #else - #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) - #endif -#endif - -#ifndef READ_ONCE - #define READ_ONCE(x) ACCESS_ONCE(x) -#endif - -#endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.c b/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.c deleted file mode 100755 index 51cb3651ed9a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.c +++ /dev/null @@ -1,989 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include "mali_kbase_vinstr.h" -#include "mali_kbase_hwcnt_virtualizer.h" -#include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_reader.h" -#include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" -#include "mali_malisw.h" -#include "mali_kbase_debug.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Hwcnt reader API version */ -#define HWCNT_READER_API 1 - -/* The minimum allowed interval between dumps (equivalent to 10KHz) */ -#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) - -/* The maximum allowed buffers per client */ -#define MAX_BUFFER_COUNT 32 - -/** - * struct kbase_vinstr_context - IOCTL interface for userspace hardware - * counters. - * @hvirt: Hardware counter virtualizer used by vinstr. - * @metadata: Hardware counter metadata provided by virtualizer. - * @lock: Lock protecting all vinstr state. - * @suspend_count: Suspend reference count. If non-zero, timer and worker are - * prevented from being re-scheduled. - * @client_count: Number of vinstr clients. - * @clients: List of vinstr clients. - * @dump_timer: Timer that enqueues dump_work to a workqueue. - * @dump_work: Worker for performing periodic counter dumps. - */ -struct kbase_vinstr_context { - struct kbase_hwcnt_virtualizer *hvirt; - const struct kbase_hwcnt_metadata *metadata; - struct mutex lock; - size_t suspend_count; - size_t client_count; - struct list_head clients; - struct hrtimer dump_timer; - struct work_struct dump_work; -}; - -/** - * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. - * @vctx: Vinstr context client is attached to. - * @hvcli: Hardware counter virtualizer client. - * @node: Node used to attach this client to list in vinstr - * context. - * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic - * client. - * @next_dump_time_ns: Time in ns when this client's next periodic dump must - * occur. If 0, not a periodic client. - * @enable_map: Counters enable map. - * @dump_bufs: Array of dump buffers allocated by this client. - * @dump_bufs_meta: Metadata of dump buffers. - * @meta_idx: Index of metadata being accessed by userspace. - * @read_idx: Index of buffer read by userspace. - * @write_idx: Index of buffer being written by dump worker. - * @waitq: Client's notification queue. - */ -struct kbase_vinstr_client { - struct kbase_vinstr_context *vctx; - struct kbase_hwcnt_virtualizer_client *hvcli; - struct list_head node; - u64 next_dump_time_ns; - u32 dump_interval_ns; - struct kbase_hwcnt_enable_map enable_map; - struct kbase_hwcnt_dump_buffer_array dump_bufs; - struct kbase_hwcnt_reader_metadata *dump_bufs_meta; - atomic_t meta_idx; - atomic_t read_idx; - atomic_t write_idx; - wait_queue_head_t waitq; -}; - -static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait); - -static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg); - -static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma); - -static int kbasep_vinstr_hwcnt_reader_release( - struct inode *inode, - struct file *filp); - -/* Vinstr client file operations */ -static const struct file_operations vinstr_client_fops = { - .poll = kbasep_vinstr_hwcnt_reader_poll, - .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, - .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, - .mmap = kbasep_vinstr_hwcnt_reader_mmap, - .release = kbasep_vinstr_hwcnt_reader_release, -}; - -/** - * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. - * - * Return: Current time in nanoseconds. - */ -static u64 kbasep_vinstr_timestamp_ns(void) -{ - struct timespec ts; - - getrawmonotonic(&ts); - return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; -} - -/** - * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. - * @cur_ts_ns: Current time in nanoseconds. - * @interval: Interval between dumps in nanoseconds. - * - * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump - * time that occurs after cur_ts_ns. - */ -static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) -{ - /* Non-periodic client */ - if (interval == 0) - return 0; - - /* - * Return the next interval after the current time relative to t=0. - * This means multiple clients with the same period will synchronise, - * regardless of when they were started, allowing the worker to be - * scheduled less frequently. - */ - do_div(cur_ts_ns, interval); - return (cur_ts_ns + 1) * interval; -} - -/** - * kbasep_vinstr_client_dump() - Perform a dump for a client. - * @vcli: Non-NULL pointer to a vinstr client. - * @event_id: Event type that triggered the dump. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_dump( - struct kbase_vinstr_client *vcli, - enum base_hwcnt_reader_event event_id) -{ - int errcode; - u64 ts_start_ns; - u64 ts_end_ns; - unsigned int write_idx; - unsigned int read_idx; - struct kbase_hwcnt_dump_buffer *dump_buf; - struct kbase_hwcnt_reader_metadata *meta; - - WARN_ON(!vcli); - lockdep_assert_held(&vcli->vctx->lock); - - write_idx = atomic_read(&vcli->write_idx); - read_idx = atomic_read(&vcli->read_idx); - - /* Check if there is a place to copy HWC block into. */ - if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) - return -EBUSY; - write_idx %= vcli->dump_bufs.buf_cnt; - - dump_buf = &vcli->dump_bufs.bufs[write_idx]; - meta = &vcli->dump_bufs_meta[write_idx]; - - errcode = kbase_hwcnt_virtualizer_client_dump( - vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); - if (errcode) - return errcode; - - /* Patch the dump buf headers, to hide the counters that other hwcnt - * clients are using. - */ - kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); - - /* Zero all non-enabled counters (current values are undefined) */ - kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); - - meta->timestamp = ts_end_ns; - meta->event_id = event_id; - meta->buffer_idx = write_idx; - - /* Notify client. Make sure all changes to memory are visible. */ - wmb(); - atomic_inc(&vcli->write_idx); - wake_up_interruptible(&vcli->waitq); - return 0; -} - -/** - * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. - * @vcli: Non-NULL pointer to a vinstr client. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) -{ - u64 ts_start_ns; - u64 ts_end_ns; - - WARN_ON(!vcli); - lockdep_assert_held(&vcli->vctx->lock); - - /* A virtualizer dump with a NULL buffer will just clear the virtualizer - * client's buffer. - */ - return kbase_hwcnt_virtualizer_client_dump( - vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); -} - -/** - * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic - * vinstr clients, then reschedule the dump - * worker appropriately. - * @vctx: Non-NULL pointer to the vinstr context. - * - * If there are no periodic clients, then the dump worker will not be - * rescheduled. Else, the dump worker will be rescheduled for the next periodic - * client dump. - */ -static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) -{ - u64 cur_ts_ns; - u64 earliest_next_ns = U64_MAX; - struct kbase_vinstr_client *pos; - - WARN_ON(!vctx); - lockdep_assert_held(&vctx->lock); - - cur_ts_ns = kbasep_vinstr_timestamp_ns(); - - /* - * Update each client's next dump time, and find the earliest next - * dump time if any of the clients have a non-zero interval. - */ - list_for_each_entry(pos, &vctx->clients, node) { - const u64 cli_next_ns = - kbasep_vinstr_next_dump_time_ns( - cur_ts_ns, pos->dump_interval_ns); - - /* Non-zero next dump time implies a periodic client */ - if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) - earliest_next_ns = cli_next_ns; - - pos->next_dump_time_ns = cli_next_ns; - } - - /* Cancel the timer if it is already pending */ - hrtimer_cancel(&vctx->dump_timer); - - /* Start the timer if there are periodic clients and vinstr is not - * suspended. - */ - if ((earliest_next_ns != U64_MAX) && - (vctx->suspend_count == 0) && - !WARN_ON(earliest_next_ns < cur_ts_ns)) - hrtimer_start( - &vctx->dump_timer, - ns_to_ktime(earliest_next_ns - cur_ts_ns), - HRTIMER_MODE_REL); -} - -/** - * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients - * that need to be dumped, then reschedules itself. - * @work: Work structure. - */ -static void kbasep_vinstr_dump_worker(struct work_struct *work) -{ - struct kbase_vinstr_context *vctx = - container_of(work, struct kbase_vinstr_context, dump_work); - struct kbase_vinstr_client *pos; - u64 cur_time_ns; - - mutex_lock(&vctx->lock); - - cur_time_ns = kbasep_vinstr_timestamp_ns(); - - /* Dump all periodic clients whose next dump time is before the current - * time. - */ - list_for_each_entry(pos, &vctx->clients, node) { - if ((pos->next_dump_time_ns != 0) && - (pos->next_dump_time_ns < cur_time_ns)) - kbasep_vinstr_client_dump( - pos, BASE_HWCNT_READER_EVENT_PERIODIC); - } - - /* Update the next dump times of all periodic clients, then reschedule - * this worker at the earliest next dump time. - */ - kbasep_vinstr_reschedule_worker(vctx); - - mutex_unlock(&vctx->lock); -} - -/** - * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for - * execution as soon as possible. - * @timer: Timer structure. - */ -static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) -{ - struct kbase_vinstr_context *vctx = - container_of(timer, struct kbase_vinstr_context, dump_timer); - - /* We don't need to check vctx->suspend_count here, as the suspend - * function will ensure that any worker enqueued here is immediately - * cancelled, and the worker itself won't reschedule this timer if - * suspend_count != 0. - */ -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &vctx->dump_work); -#else - queue_work(system_highpri_wq, &vctx->dump_work); -#endif - return HRTIMER_NORESTART; -} - -/** - * kbasep_vinstr_client_destroy() - Destroy a vinstr client. - * @vcli: vinstr client. Must not be attached to a vinstr context. - */ -static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) -{ - if (!vcli) - return; - - kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); - kfree(vcli->dump_bufs_meta); - kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); - kbase_hwcnt_enable_map_free(&vcli->enable_map); - kfree(vcli); -} - -/** - * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to - * the vinstr context. - * @vctx: Non-NULL pointer to vinstr context. - * @setup: Non-NULL pointer to hardware counter ioctl setup structure. - * setup->buffer_count must not be 0. - * @out_vcli: Non-NULL pointer to where created client will be stored on - * success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_client_create( - struct kbase_vinstr_context *vctx, - struct kbase_ioctl_hwcnt_reader_setup *setup, - struct kbase_vinstr_client **out_vcli) -{ - int errcode; - struct kbase_vinstr_client *vcli; - struct kbase_hwcnt_physical_enable_map phys_em; - - WARN_ON(!vctx); - WARN_ON(!setup); - WARN_ON(setup->buffer_count == 0); - - vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); - if (!vcli) - return -ENOMEM; - - vcli->vctx = vctx; - - errcode = kbase_hwcnt_enable_map_alloc( - vctx->metadata, &vcli->enable_map); - if (errcode) - goto error; - - phys_em.jm_bm = setup->jm_bm; - phys_em.shader_bm = setup->shader_bm; - phys_em.tiler_bm = setup->tiler_bm; - phys_em.mmu_l2_bm = setup->mmu_l2_bm; - kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); - - errcode = kbase_hwcnt_dump_buffer_array_alloc( - vctx->metadata, setup->buffer_count, &vcli->dump_bufs); - if (errcode) - goto error; - - errcode = -ENOMEM; - vcli->dump_bufs_meta = kmalloc_array( - setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); - if (!vcli->dump_bufs_meta) - goto error; - - errcode = kbase_hwcnt_virtualizer_client_create( - vctx->hvirt, &vcli->enable_map, &vcli->hvcli); - if (errcode) - goto error; - - init_waitqueue_head(&vcli->waitq); - - *out_vcli = vcli; - return 0; -error: - kbasep_vinstr_client_destroy(vcli); - return errcode; -} - -int kbase_vinstr_init( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_vinstr_context **out_vctx) -{ - struct kbase_vinstr_context *vctx; - const struct kbase_hwcnt_metadata *metadata; - - if (!hvirt || !out_vctx) - return -EINVAL; - - metadata = kbase_hwcnt_virtualizer_metadata(hvirt); - if (!metadata) - return -EINVAL; - - vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); - if (!vctx) - return -ENOMEM; - - vctx->hvirt = hvirt; - vctx->metadata = metadata; - - mutex_init(&vctx->lock); - INIT_LIST_HEAD(&vctx->clients); - hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vctx->dump_timer.function = kbasep_vinstr_dump_timer; - INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); - - *out_vctx = vctx; - return 0; -} - -void kbase_vinstr_term(struct kbase_vinstr_context *vctx) -{ - if (!vctx) - return; - - cancel_work_sync(&vctx->dump_work); - - /* Non-zero client count implies client leak */ - if (WARN_ON(vctx->client_count != 0)) { - struct kbase_vinstr_client *pos, *n; - - list_for_each_entry_safe(pos, n, &vctx->clients, node) { - list_del(&pos->node); - vctx->client_count--; - kbasep_vinstr_client_destroy(pos); - } - } - - WARN_ON(vctx->client_count != 0); - kfree(vctx); -} - -void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) -{ - if (WARN_ON(!vctx)) - return; - - mutex_lock(&vctx->lock); - - if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) - vctx->suspend_count++; - - mutex_unlock(&vctx->lock); - - /* Always sync cancel the timer and then the worker, regardless of the - * new suspend count. - * - * This ensures concurrent calls to kbase_vinstr_suspend() always block - * until vinstr is fully suspended. - * - * The timer is cancelled before the worker, as the timer - * unconditionally re-enqueues the worker, but the worker checks the - * suspend_count that we just incremented before rescheduling the timer. - * - * Therefore if we cancel the worker first, the timer might re-enqueue - * the worker before we cancel the timer, but the opposite is not - * possible. - */ - hrtimer_cancel(&vctx->dump_timer); - cancel_work_sync(&vctx->dump_work); -} - -void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) -{ - if (WARN_ON(!vctx)) - return; - - mutex_lock(&vctx->lock); - - if (!WARN_ON(vctx->suspend_count == 0)) { - vctx->suspend_count--; - - /* Last resume, so re-enqueue the worker if we have any periodic - * clients. - */ - if (vctx->suspend_count == 0) { - struct kbase_vinstr_client *pos; - bool has_periodic_clients = false; - - list_for_each_entry(pos, &vctx->clients, node) { - if (pos->dump_interval_ns != 0) { - has_periodic_clients = true; - break; - } - } - - if (has_periodic_clients) -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &vctx->dump_work); -#else - queue_work(system_highpri_wq, &vctx->dump_work); -#endif - } - } - - mutex_unlock(&vctx->lock); -} - -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vctx, - struct kbase_ioctl_hwcnt_reader_setup *setup) -{ - int errcode; - int fd; - struct kbase_vinstr_client *vcli = NULL; - - if (!vctx || !setup || - (setup->buffer_count == 0) || - (setup->buffer_count > MAX_BUFFER_COUNT)) - return -EINVAL; - - errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); - if (errcode) - goto error; - - errcode = anon_inode_getfd( - "[mali_vinstr_desc]", - &vinstr_client_fops, - vcli, - O_RDONLY | O_CLOEXEC); - if (errcode < 0) - goto error; - - fd = errcode; - - /* Add the new client. No need to reschedule worker, as not periodic */ - mutex_lock(&vctx->lock); - - vctx->client_count++; - list_add(&vcli->node, &vctx->clients); - - mutex_unlock(&vctx->lock); - - return fd; -error: - kbasep_vinstr_client_destroy(vcli); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready - * buffers. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: Non-zero if client has at least one dumping buffer filled that was - * not notified to user yet. - */ -static int kbasep_vinstr_hwcnt_reader_buffer_ready( - struct kbase_vinstr_client *cli) -{ - WARN_ON(!cli); - return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_dump( - struct kbase_vinstr_client *cli) -{ - int errcode; - - mutex_lock(&cli->vctx->lock); - - errcode = kbasep_vinstr_client_dump( - cli, BASE_HWCNT_READER_EVENT_MANUAL); - - mutex_unlock(&cli->vctx->lock); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_clear( - struct kbase_vinstr_client *cli) -{ - int errcode; - - mutex_lock(&cli->vctx->lock); - - errcode = kbasep_vinstr_client_clear(cli); - - mutex_unlock(&cli->vctx->lock); - return errcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @buffer: Non-NULL pointer to userspace buffer. - * @size: Size of buffer. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - struct kbase_vinstr_client *cli, - void __user *buffer, - size_t size) -{ - unsigned int meta_idx = atomic_read(&cli->meta_idx); - unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; - - struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; - - /* Metadata sanity check. */ - WARN_ON(idx != meta->buffer_idx); - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - - /* Check if there is any buffer available. */ - if (atomic_read(&cli->write_idx) == meta_idx) - return -EAGAIN; - - /* Check if previously taken buffer was put back. */ - if (atomic_read(&cli->read_idx) != meta_idx) - return -EBUSY; - - /* Copy next available buffer's metadata to user. */ - if (copy_to_user(buffer, meta, size)) - return -EFAULT; - - atomic_inc(&cli->meta_idx); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @buffer: Non-NULL pointer to userspace buffer. - * @size: Size of buffer. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - struct kbase_vinstr_client *cli, - void __user *buffer, - size_t size) -{ - unsigned int read_idx = atomic_read(&cli->read_idx); - unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; - - struct kbase_hwcnt_reader_metadata meta; - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - - /* Check if any buffer was taken. */ - if (atomic_read(&cli->meta_idx) == read_idx) - return -EPERM; - - /* Check if correct buffer is put back. */ - if (copy_from_user(&meta, buffer, size)) - return -EFAULT; - if (idx != meta.buffer_idx) - return -EINVAL; - - atomic_inc(&cli->read_idx); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @interval: Periodic dumping interval (disable periodic dumping if 0). - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - struct kbase_vinstr_client *cli, - u32 interval) -{ - mutex_lock(&cli->vctx->lock); - - if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) - interval = DUMP_INTERVAL_MIN_NS; - /* Update the interval, and put in a dummy next dump time */ - cli->dump_interval_ns = interval; - cli->next_dump_time_ns = 0; - - /* - * If it's a periodic client, kick off the worker early to do a proper - * timer reschedule. Return value is ignored, as we don't care if the - * worker is already queued. - */ - if ((interval != 0) && (cli->vctx->suspend_count == 0)) -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &cli->vctx->dump_work); -#else - queue_work(system_highpri_wq, &cli->vctx->dump_work); -#endif - - mutex_unlock(&cli->vctx->lock); - - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @event_id: ID of event to enable. - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - /* No-op, as events aren't supported */ - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl - * command. - * @cli: Non-NULL pointer to vinstr client. - * @event_id: ID of event to disable. - * - * Return: 0 always. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - struct kbase_vinstr_client *cli, - enum base_hwcnt_reader_event event_id) -{ - /* No-op, as events aren't supported */ - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. - * @cli: Non-NULL pointer to vinstr client. - * @hwver: Non-NULL pointer to user buffer where HW version will be stored. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - struct kbase_vinstr_client *cli, - u32 __user *hwver) -{ - u32 ver = 0; - const enum kbase_hwcnt_gpu_group_type type = - kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); - - switch (type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - ver = 4; - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - ver = 5; - break; - default: - WARN_ON(true); - } - - if (ver != 0) { - return put_user(ver, hwver); - } else { - return -EINVAL; - } -} - -/** - * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. - * @filp: Non-NULL pointer to file structure. - * @cmd: User command. - * @arg: Command's argument. - * - * Return: 0 on success, else error code. - */ -static long kbasep_vinstr_hwcnt_reader_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long arg) -{ - long rcode; - struct kbase_vinstr_client *cli; - - if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) - return -EINVAL; - - cli = filp->private_data; - if (!cli) - return -EINVAL; - - switch (cmd) { - case KBASE_HWCNT_READER_GET_API_VERSION: - rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); - break; - case KBASE_HWCNT_READER_GET_HWVER: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( - cli, (u32 __user *)arg); - break; - case KBASE_HWCNT_READER_GET_BUFFER_SIZE: - rcode = put_user( - (u32)cli->vctx->metadata->dump_buf_bytes, - (u32 __user *)arg); - break; - case KBASE_HWCNT_READER_DUMP: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); - break; - case KBASE_HWCNT_READER_CLEAR: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); - break; - case KBASE_HWCNT_READER_GET_BUFFER: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); - break; - case KBASE_HWCNT_READER_PUT_BUFFER: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( - cli, (void __user *)arg, _IOC_SIZE(cmd)); - break; - case KBASE_HWCNT_READER_SET_INTERVAL: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( - cli, (u32)arg); - break; - case KBASE_HWCNT_READER_ENABLE_EVENT: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( - cli, (enum base_hwcnt_reader_event)arg); - break; - case KBASE_HWCNT_READER_DISABLE_EVENT: - rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( - cli, (enum base_hwcnt_reader_event)arg); - break; - default: - WARN_ON(true); - rcode = -EINVAL; - break; - } - - return rcode; -} - -/** - * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. - * @filp: Non-NULL pointer to file structure. - * @wait: Non-NULL pointer to poll table. - * - * Return: POLLIN if data can be read without blocking, 0 if data can not be - * read without blocking, else error code. - */ -static unsigned int kbasep_vinstr_hwcnt_reader_poll( - struct file *filp, - poll_table *wait) -{ - struct kbase_vinstr_client *cli; - - if (!filp || !wait) - return -EINVAL; - - cli = filp->private_data; - if (!cli) - return -EINVAL; - - poll_wait(filp, &cli->waitq, wait); - if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) - return POLLIN; - return 0; -} - -/** - * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. - * @filp: Non-NULL pointer to file structure. - * @vma: Non-NULL pointer to vma structure. - * - * Return: 0 on success, else error code. - */ -static int kbasep_vinstr_hwcnt_reader_mmap( - struct file *filp, - struct vm_area_struct *vma) -{ - struct kbase_vinstr_client *cli; - unsigned long vm_size, size, addr, pfn, offset; - - if (!filp || !vma) - return -EINVAL; - - cli = filp->private_data; - if (!cli) - return -EINVAL; - - vm_size = vma->vm_end - vma->vm_start; - size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; - - if (vma->vm_pgoff > (size >> PAGE_SHIFT)) - return -EINVAL; - - offset = vma->vm_pgoff << PAGE_SHIFT; - if (vm_size > size - offset) - return -EINVAL; - - addr = __pa(cli->dump_bufs.page_addr + offset); - pfn = addr >> PAGE_SHIFT; - - return remap_pfn_range( - vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); -} - -/** - * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. - * @inode: Non-NULL pointer to inode structure. - * @filp: Non-NULL pointer to file structure. - * - * Return: 0 always. - */ -static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, - struct file *filp) -{ - struct kbase_vinstr_client *vcli = filp->private_data; - - mutex_lock(&vcli->vctx->lock); - - vcli->vctx->client_count--; - list_del(&vcli->node); - - mutex_unlock(&vcli->vctx->lock); - - kbasep_vinstr_client_destroy(vcli); - - return 0; -} diff --git a/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.h b/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.h deleted file mode 100755 index 81d315f95567..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_kbase_vinstr.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* - * Vinstr, used to provide an ioctl for userspace access to periodic hardware - * counters. - */ - -#ifndef _KBASE_VINSTR_H_ -#define _KBASE_VINSTR_H_ - -struct kbase_vinstr_context; -struct kbase_hwcnt_virtualizer; -struct kbase_ioctl_hwcnt_reader_setup; - -/** - * kbase_vinstr_init() - Initialise a vinstr context. - * @hvirt: Non-NULL pointer to the hardware counter virtualizer. - * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr - * context will be stored on success. - * - * On creation, the suspend count of the context will be 0. - * - * Return: 0 on success, else error code. - */ -int kbase_vinstr_init( - struct kbase_hwcnt_virtualizer *hvirt, - struct kbase_vinstr_context **out_vctx); - -/** - * kbase_vinstr_term() - Terminate a vinstr context. - * @vctx: Pointer to the vinstr context to be terminated. - */ -void kbase_vinstr_term(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_suspend() - Increment the suspend count of the context. - * @vctx: Non-NULL pointer to the vinstr context to be suspended. - * - * After this function call returns, it is guaranteed that all timers and - * workers in vinstr will be cancelled, and will not be re-triggered until - * after the context has been resumed. In effect, this means no new counter - * dumps will occur for any existing or subsequently added periodic clients. - */ -void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_resume() - Decrement the suspend count of the context. - * @vctx: Non-NULL pointer to the vinstr context to be resumed. - * - * If a call to this function decrements the suspend count from 1 to 0, then - * normal operation of vinstr will be resumed (i.e. counter dumps will once - * again be automatically triggered for all periodic clients). - * - * It is only valid to call this function one time for each prior returned call - * to kbase_vinstr_suspend. - */ -void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); - -/** - * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader - * client. - * @vinstr_ctx: Non-NULL pointer to the vinstr context. - * @setup: Non-NULL pointer to the hwcnt reader configuration. - * - * Return: file descriptor on success, else a (negative) error code. - */ -int kbase_vinstr_hwcnt_reader_setup( - struct kbase_vinstr_context *vinstr_ctx, - struct kbase_ioctl_hwcnt_reader_setup *setup); - -#endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_linux_kbase_trace.h b/drivers/gpu/drm/bifrost/midgard/mali_linux_kbase_trace.h deleted file mode 100755 index 6c6a8c6a5b43..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_linux_kbase_trace.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MALI_KBASE_H - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mali - -#include - -DECLARE_EVENT_CLASS(mali_slot_template, - TP_PROTO(int jobslot, unsigned int info_val), - TP_ARGS(jobslot, info_val), - TP_STRUCT__entry( - __field(unsigned int, jobslot) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->jobslot = jobslot; - __entry->info_val = info_val; - ), - TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val) -); - -#define DEFINE_MALI_SLOT_EVENT(name) \ -DEFINE_EVENT(mali_slot_template, mali_##name, \ - TP_PROTO(int jobslot, unsigned int info_val), \ - TP_ARGS(jobslot, info_val)) -DEFINE_MALI_SLOT_EVENT(JM_SUBMIT); -DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE); -DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD); -DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0); -DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0); -DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1); -DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); -DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT); -DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER); -DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); -DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT); -DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); -DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); -DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); -DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); -DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); -DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); -#undef DEFINE_MALI_SLOT_EVENT - -DECLARE_EVENT_CLASS(mali_refcount_template, - TP_PROTO(int refcount, unsigned int info_val), - TP_ARGS(refcount, info_val), - TP_STRUCT__entry( - __field(unsigned int, refcount) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->refcount = refcount; - __entry->info_val = info_val; - ), - TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val) -); - -#define DEFINE_MALI_REFCOUNT_EVENT(name) \ -DEFINE_EVENT(mali_refcount_template, mali_##name, \ - TP_PROTO(int refcount, unsigned int info_val), \ - TP_ARGS(refcount, info_val)) -DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK); -DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB); -DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB); -DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); -DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); -DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE); -DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE); -#undef DEFINE_MALI_REFCOUNT_EVENT - -DECLARE_EVENT_CLASS(mali_add_template, - TP_PROTO(int gpu_addr, unsigned int info_val), - TP_ARGS(gpu_addr, info_val), - TP_STRUCT__entry( - __field(unsigned int, gpu_addr) - __field(unsigned int, info_val) - ), - TP_fast_assign( - __entry->gpu_addr = gpu_addr; - __entry->info_val = info_val; - ), - TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val) -); - -#define DEFINE_MALI_ADD_EVENT(name) \ -DEFINE_EVENT(mali_add_template, mali_##name, \ - TP_PROTO(int gpu_addr, unsigned int info_val), \ - TP_ARGS(gpu_addr, info_val)) -DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); -DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); -DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); -DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); -DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); -DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); -DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); -DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); -DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER); -DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END); -DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER); -DEFINE_MALI_ADD_EVENT(JD_DONE); -DEFINE_MALI_ADD_EVENT(JD_CANCEL); -DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT); -DEFINE_MALI_ADD_EVENT(JM_IRQ); -DEFINE_MALI_ADD_EVENT(JM_IRQ_END); -DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS); -DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE); -DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED); -DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); -DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); -DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); -DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); -DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); -DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END); -DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START); -DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); -DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); -DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); -DEFINE_MALI_ADD_EVENT(PM_PWRON); -DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); -DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); -DEFINE_MALI_ADD_EVENT(PM_PWROFF); -DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); -DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); -DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); -DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); -DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); -DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); -DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); -DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); -DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); -DEFINE_MALI_ADD_EVENT(PM_GPU_ON); -DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); -DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); -DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); -DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); -DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); -DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); -#undef DEFINE_MALI_ADD_EVENT - -#endif /* _TRACE_MALI_KBASE_H */ - -#undef TRACE_INCLUDE_PATH -#undef linux -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mali_linux_kbase_trace - -/* This part must be outside protection */ -#include diff --git a/drivers/gpu/drm/bifrost/midgard/mali_linux_trace.h b/drivers/gpu/drm/bifrost/midgard/mali_linux_trace.h deleted file mode 100755 index 0741dfcab575..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_linux_trace.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_MALI_H - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mali -#define TRACE_INCLUDE_FILE mali_linux_trace - -#include - -#define MALI_JOB_SLOTS_EVENT_CHANGED - -/** - * mali_job_slots_event - called from mali_kbase_core_linux.c - * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. - */ -TRACE_EVENT(mali_job_slots_event, - TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, - unsigned char job_id), - TP_ARGS(event_id, tgid, pid, job_id), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned int, tgid) - __field(unsigned int, pid) - __field(unsigned char, job_id) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->tgid = tgid; - __entry->pid = pid; - __entry->job_id = job_id; - ), - TP_printk("event=%u tgid=%u pid=%u job_id=%u", - __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) -); - -/** - * mali_pm_status - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) - */ -TRACE_EVENT(mali_pm_status, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) -); - -/** - * mali_pm_power_on - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting the cores to power up - */ -TRACE_EVENT(mali_pm_power_on, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) -); - -/** - * mali_pm_power_off - Called by mali_kbase_pm_driver.c - * @event_id: core type (shader, tiler, l2 cache) - * @value: 64bits bitmask reporting the cores to power down - */ -TRACE_EVENT(mali_pm_power_off, - TP_PROTO(unsigned int event_id, unsigned long long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(unsigned int, event_id) - __field(unsigned long long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %u = %llu", __entry->event_id, __entry->value) -); - -/** - * mali_page_fault_insert_pages - Called by page_fault_worker() - * it reports an MMU page fault resulting in new pages being mapped. - * @event_id: MMU address space number. - * @value: number of newly allocated pages - */ -TRACE_EVENT(mali_page_fault_insert_pages, - TP_PROTO(int event_id, unsigned long value), - TP_ARGS(event_id, value), - TP_STRUCT__entry( - __field(int, event_id) - __field(unsigned long, value) - ), - TP_fast_assign( - __entry->event_id = event_id; - __entry->value = value; - ), - TP_printk("event %d = %lu", __entry->event_id, __entry->value) -); - -/** - * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() - * it reports that a certain MMU address space is in use now. - * @event_id: MMU address space number. - */ -TRACE_EVENT(mali_mmu_as_in_use, - TP_PROTO(int event_id), - TP_ARGS(event_id), - TP_STRUCT__entry( - __field(int, event_id) - ), - TP_fast_assign( - __entry->event_id = event_id; - ), - TP_printk("event=%d", __entry->event_id) -); - -/** - * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() - * it reports that a certain MMU address space has been released now. - * @event_id: MMU address space number. - */ -TRACE_EVENT(mali_mmu_as_released, - TP_PROTO(int event_id), - TP_ARGS(event_id), - TP_STRUCT__entry( - __field(int, event_id) - ), - TP_fast_assign( - __entry->event_id = event_id; - ), - TP_printk("event=%d", __entry->event_id) -); - -/** - * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() - * and by kbase_atomic_sub_pages() - * it reports that the total number of allocated pages is changed. - * @event_id: number of pages to be added or subtracted (according to the sign). - */ -TRACE_EVENT(mali_total_alloc_pages_change, - TP_PROTO(long long int event_id), - TP_ARGS(event_id), - TP_STRUCT__entry( - __field(long long int, event_id) - ), - TP_fast_assign( - __entry->event_id = event_id; - ), - TP_printk("event=%lld", __entry->event_id) -); - -#endif /* _TRACE_MALI_H */ - -#undef TRACE_INCLUDE_PATH -#undef linux -#define TRACE_INCLUDE_PATH . - -/* This part must be outside protection */ -#include diff --git a/drivers/gpu/drm/bifrost/midgard/mali_malisw.h b/drivers/gpu/drm/bifrost/midgard/mali_malisw.h deleted file mode 100755 index 3a4db10bdb3d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_malisw.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Kernel-wide include for common macros and types. - */ - -#ifndef _MALISW_H_ -#define _MALISW_H_ - -#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) -#define U8_MAX ((u8)~0U) -#define S8_MAX ((s8)(U8_MAX>>1)) -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define U16_MAX ((u16)~0U) -#define S16_MAX ((s16)(U16_MAX>>1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define U32_MAX ((u32)~0U) -#define S32_MAX ((s32)(U32_MAX>>1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define U64_MAX ((u64)~0ULL) -#define S64_MAX ((s64)(U64_MAX>>1)) -#define S64_MIN ((s64)(-S64_MAX - 1)) -#endif /* LINUX_VERSION_CODE */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) -#define SIZE_MAX (~(size_t)0) -#endif /* LINUX_VERSION_CODE */ - -/** - * MIN - Return the lesser of two values. - * - * As a macro it may evaluate its arguments more than once. - * Refer to MAX macro for more details - */ -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - -/** - * MAX - Return the greater of two values. - * - * As a macro it may evaluate its arguments more than once. - * If called on the same two arguments as MIN it is guaranteed to return - * the one that MIN didn't return. This is significant for types where not - * all values are comparable e.g. NaNs in floating-point types. But if you want - * to retrieve the min and max of two values, consider using a conditional swap - * instead. - */ -#define MAX(x, y) ((x) < (y) ? (y) : (x)) - -/** - * @hideinitializer - * Function-like macro for suppressing unused variable warnings. Where possible - * such variables should be removed; this macro is present for cases where we - * much support API backwards compatibility. - */ -#define CSTD_UNUSED(x) ((void)(x)) - -/** - * @hideinitializer - * Function-like macro for use where "no behavior" is desired. This is useful - * when compile time macros turn a function-like macro in to a no-op, but - * where having no statement is otherwise invalid. - */ -#define CSTD_NOP(...) ((void)#__VA_ARGS__) - -/** - * @hideinitializer - * Function-like macro for stringizing a single level macro. - * @code - * #define MY_MACRO 32 - * CSTD_STR1( MY_MACRO ) - * > "MY_MACRO" - * @endcode - */ -#define CSTD_STR1(x) #x - -/** - * @hideinitializer - * Function-like macro for stringizing a macro's value. This should not be used - * if the macro is defined in a way which may have no value; use the - * alternative @c CSTD_STR2N macro should be used instead. - * @code - * #define MY_MACRO 32 - * CSTD_STR2( MY_MACRO ) - * > "32" - * @endcode - */ -#define CSTD_STR2(x) CSTD_STR1(x) - -#endif /* _MALISW_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_midg_coherency.h b/drivers/gpu/drm/bifrost/midgard/mali_midg_coherency.h deleted file mode 100755 index 29d5df38c92b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_midg_coherency.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _MIDG_COHERENCY_H_ -#define _MIDG_COHERENCY_H_ - -#define COHERENCY_ACE_LITE 0 -#define COHERENCY_ACE 1 -#define COHERENCY_NONE 31 -#define COHERENCY_FEATURE_BIT(x) (1 << (x)) - -#endif /* _MIDG_COHERENCY_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_midg_regmap.h b/drivers/gpu/drm/bifrost/midgard/mali_midg_regmap.h deleted file mode 100755 index 0f03e8d216ab..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_midg_regmap.h +++ /dev/null @@ -1,643 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _MIDGARD_REGMAP_H_ -#define _MIDGARD_REGMAP_H_ - -#include "mali_midg_coherency.h" -#include "mali_kbase_gpu_id.h" - -/* - * Begin Register Offsets - */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define JS_PRESENT 0x01C /* (RO) Job slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET - commands which may take time. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down - and the power manager is idle. */ - -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ - -#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ - | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ -#define LATEST_FLUSH 0x038 /* (RO) */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ - -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that - * TLS must be allocated for - */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ - -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) - -#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ -#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ -#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ -#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ -#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ -#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ -#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ -#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ -#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ -#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ -#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ -#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ -#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ -#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ -#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ -#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ - -#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ - -#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ - - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ - - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ - - -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ - -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ - -#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ - - -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ - -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ - -#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ - - -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ - -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ - -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ - -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ - -#define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ -#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ -#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ - -/* JOB IRQ flags */ -#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ - -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ -#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ -#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ -#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ -#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ -#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ -#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ -#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ -#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ -#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ -#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ -#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ -#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ -#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ -#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ -#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ - -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job - slot n */ - -#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ - -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ - -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for - job slot n */ - -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ - -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C - -/* End Register Offsets */ - -/* - * MMU_IRQ_RAWSTAT register values. Values are valid also for - MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. - */ - -#define MMU_PAGE_FAULT_FLAGS 16 - -/* Macros returning a bitmask to retrieve page fault or bus error flags from - * MMU registers */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) - -/* - * Begin LPAE MMU TRANSTAB register values - */ -#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 -#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) -#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) -#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) -#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) -#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) - -#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 - -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) - -/* - * Begin MMU STATUS register values - */ -#define AS_STATUS_AS_ACTIVE 0x01 - -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) - -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) - -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) - -/* - * Begin MMU TRANSCFG register values - */ - -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) -/* - * Begin Command Values - */ - -/* JS_COMMAND register commands */ -#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ - -#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ - -/* AS_COMMAND register commands */ -#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs - (deprecated - only for use with T60x) */ -#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then - flush all L2 caches then issue a flush region command to all MMUs */ - -/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ -#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) -#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) -#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) -#define JS_CONFIG_START_MMU (1u << 10) -#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) -#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION -#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) -#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) -#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) - -/* JS_XAFFINITY register values */ -#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) - -/* JS_STATUS register values */ - -/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. - * The values are separated to avoid dependency of userspace and kernel code. - */ - -/* Group of values representing the job status insead a particular fault */ -#define JS_STATUS_NO_EXCEPTION_BASE 0x00 -#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ -#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ -#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ - -/* General fault values */ -#define JS_STATUS_FAULT_BASE 0x40 -#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ -#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ -#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ -#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ -#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ -#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ - -/* Instruction or data faults */ -#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 -#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ -#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ -#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ -#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ -#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ -#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ -#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ -/* NOTE: No fault with 0x57 code defined in spec. */ -#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ -#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ -#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ - -/* Other faults */ -#define JS_STATUS_MEMORY_FAULT_BASE 0x60 -#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ -#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ - -/* GPU_COMMAND values */ -#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ -#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ -#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ -#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ -#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ -#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ -#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ -#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ - -/* End Command Values */ - -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - -/* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ -#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ - -/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull -/* Set to inner non-cacheable, outer-non-cacheable - * Setting defined by the alloc bits is ignored, but set to a valid encoding: - * - no-alloc on read - * - no alloc on write - */ -#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull - -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -/* There is no LPAE support for non-cacheable, since the memory type is always - * write-back. - * Marking this setting as reserved for LPAE - */ -#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED - -/* Symbols for default MEMATTR to use - * Default is - HW implementation defined caching */ -#define AS_MEMATTR_INDEX_DEFAULT 0 -#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 - -/* HW implementation defined caching */ -#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 -/* Force cache on */ -#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 -/* Write-alloc */ -#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 -/* Outer coherent, inner implementation defined policy */ -#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 -/* Outer coherent, write alloc inner */ -#define AS_MEMATTR_INDEX_OUTER_WA 4 -/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ -#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 - -/* JS_FEATURES register */ - -#define JS_FEATURE_NULL_JOB (1u << 1) -#define JS_FEATURE_SET_VALUE_JOB (1u << 2) -#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) -#define JS_FEATURE_COMPUTE_JOB (1u << 4) -#define JS_FEATURE_VERTEX_JOB (1u << 5) -#define JS_FEATURE_GEOMETRY_JOB (1u << 6) -#define JS_FEATURE_TILER_JOB (1u << 7) -#define JS_FEATURE_FUSED_JOB (1u << 8) -#define JS_FEATURE_FRAGMENT_JOB (1u << 9) - -/* End JS_FEATURES register */ - -/* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) - -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) -#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) - -#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT (12) -#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) - -#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT (15) -#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) - -/* End L2_MMU_CONFIG register */ - -/* THREAD_* registers */ - -/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -#define IMPLEMENTATION_UNSPECIFIED 0 -#define IMPLEMENTATION_SILICON 1 -#define IMPLEMENTATION_FPGA 2 -#define IMPLEMENTATION_MODEL 3 - -/* Default values when registers are not supported by the implemented hardware */ -#define THREAD_MT_DEFAULT 256 -#define THREAD_MWS_DEFAULT 256 -#define THREAD_MBS_DEFAULT 256 -#define THREAD_MR_DEFAULT 1024 -#define THREAD_MTQ_DEFAULT 4 -#define THREAD_MTGS_DEFAULT 10 - -/* End THREAD_* registers */ - -/* SHADER_CONFIG register */ - -#define SC_ALT_COUNTERS (1ul << 3) -#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) -#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) -#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) -#define SC_TLS_HASH_ENABLE (1ul << 17) -#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) -/* End SHADER_CONFIG register */ - -/* TILER_CONFIG register */ - -#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) - -/* End TILER_CONFIG register */ - -/* JM_CONFIG register */ - -#define JM_TIMESTAMP_OVERRIDE (1ul << 0) -#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) -#define JM_JOB_THROTTLE_ENABLE (1ul << 2) -#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) -#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) -#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) -#define JM_IDVS_GROUP_SIZE_SHIFT (16) -#define JM_MAX_IDVS_GROUP_SIZE (0x3F) -/* End JM_CONFIG register */ - - -#endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/mali_uk.h b/drivers/gpu/drm/bifrost/midgard/mali_uk.h deleted file mode 100755 index 701f3909042f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/mali_uk.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_uk.h - * Types and definitions that are common across OSs for both the user - * and kernel side of the User-Kernel interface. - */ - -#ifndef _UK_H_ -#define _UK_H_ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/** - * @addtogroup base_api - * @{ - */ - -/** - * @defgroup uk_api User-Kernel Interface API - * - * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device - * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. - * - * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent - * kernel-side API (UKK) via an OS-specific communication mechanism. - * - * This API is internal to the Midgard DDK and is not exposed to any applications. - * - * @{ - */ - -/** - * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The - * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this - * identifier to select a UKK client to the uku_open() function. - * - * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id - * enumeration and the uku_open() implemenation for the various OS ports need to be updated to - * provide a mapping of the identifier to the OS specific device name. - * - */ -enum uk_client_id { - /** - * Value used to identify the Base driver UK client. - */ - UK_CLIENT_MALI_T600_BASE, - - /** The number of uk clients supported. This must be the last member of the enum */ - UK_CLIENT_COUNT -}; - -/** @} end group uk_api */ - -/** @} *//* end group base_api */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _UK_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/platform/Kconfig b/drivers/gpu/drm/bifrost/midgard/platform/Kconfig deleted file mode 100755 index ef9fb963ecf5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/Kconfig +++ /dev/null @@ -1,30 +0,0 @@ -# -# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - - -# Add your platform specific Kconfig file here -# -# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" -# -# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME -# - diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/Kbuild b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/Kbuild deleted file mode 100755 index 0a82eafb5b51..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/Kbuild +++ /dev/null @@ -1,39 +0,0 @@ -# -# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -USE_GPPLL?=0 -ifdef CONFIG_AM_VIDEO - USE_GPPLL:=1 -endif - -ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL) - -mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ - $(MALI_PLATFORM_DIR)/mali_clock.o \ - $(MALI_PLATFORM_DIR)/mpgpu.o \ - $(MALI_PLATFORM_DIR)/meson_main2.o \ - $(MALI_PLATFORM_DIR)/platform_gx.o \ - $(MALI_PLATFORM_DIR)/scaling.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.c deleted file mode 100644 index d3ff22c1de41..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.c +++ /dev/null @@ -1,675 +0,0 @@ -/* - * mali_clock.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include "mali_scaling.h" -#include "mali_clock.h" - -#ifndef AML_CLK_LOCK_ERROR -#define AML_CLK_LOCK_ERROR 1 -#endif - -static unsigned gpu_dbg_level = 0; -module_param(gpu_dbg_level, uint, 0644); -MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level"); - -#define gpu_dbg(level, fmt, arg...) \ - do { \ - if (gpu_dbg_level >= (level)) \ - printk("gpu_debug"fmt , ## arg); \ - } while (0) - -#define GPU_CLK_DBG(fmt, arg...) - -//disable print -//#define _dev_info(...) - -//static DEFINE_SPINLOCK(lock); -static mali_plat_info_t* pmali_plat = NULL; -//static u32 mali_extr_backup = 0; -//static u32 mali_extr_sample_backup = 0; -struct timeval start; -struct timeval end; -int mali_pm_statue = 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)) -int mali_clock_init_clk_tree(struct platform_device* pdev) -{ - mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock]; - struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle; - struct clk *clk_mali_0 = pmali_plat->clk_mali_0; -#ifdef AML_CLK_LOCK_ERROR - struct clk *clk_mali_1 = pmali_plat->clk_mali_1; -#endif - struct clk *clk_mali = pmali_plat->clk_mali; - - clk_set_parent(clk_mali_0, clk_mali_0_parent); - - clk_prepare_enable(clk_mali_0); - - clk_set_parent(clk_mali, clk_mali_0); - -#ifdef AML_CLK_LOCK_ERROR - clk_set_parent(clk_mali_1, clk_mali_0_parent); - clk_prepare_enable(clk_mali_1); -#endif - - GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n", - clk_mali_0->name, clk_mali_0->enable_count, - clk_mali_0_parent->name, clk_mali_0_parent->enable_count); - - return 0; -} - -int mali_clock_init(mali_plat_info_t *pdev) -{ - *pdev = *pdev; - return 0; -} - -int mali_clock_critical(critical_t critical, size_t param) -{ - int ret = 0; - - ret = critical(param); - - return ret; -} - -static int critical_clock_set(size_t param) -{ - int ret = 0; - unsigned int idx = param; - mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx]; - - struct clk *clk_mali_0 = pmali_plat->clk_mali_0; - struct clk *clk_mali_1 = pmali_plat->clk_mali_1; - struct clk *clk_mali_x = NULL; - struct clk *clk_mali_x_parent = NULL; - struct clk *clk_mali_x_old = NULL; - struct clk *clk_mali = pmali_plat->clk_mali; - unsigned long time_use=0; - - clk_mali_x_old = clk_get_parent(clk_mali); - - if (!clk_mali_x_old) { - printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n"); - return 0; - } - if (clk_mali_x_old == clk_mali_0) { - clk_mali_x = clk_mali_1; - } else if (clk_mali_x_old == clk_mali_1) { - clk_mali_x = clk_mali_0; - } else { - printk("gpu: unmatched clk_mali_x_old\n"); - return 0; - } - - GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq); - clk_mali_x_parent = dvfs_tbl->clkp_handle; - if (!clk_mali_x_parent) { - printk("gpu: could not get clk_mali_x_parent\n"); - return 0; - } - - GPU_CLK_DBG(); - ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq); - GPU_CLK_DBG(); - ret = clk_set_parent(clk_mali_x, clk_mali_x_parent); - GPU_CLK_DBG(); - ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq); - GPU_CLK_DBG(); -#ifndef AML_CLK_LOCK_ERROR - ret = clk_prepare_enable(clk_mali_x); -#endif - GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name, clk_mali_x->enable_count); - do_gettimeofday(&start); - udelay(1);// delay 10ns - do_gettimeofday(&end); - ret = clk_set_parent(clk_mali, clk_mali_x); - GPU_CLK_DBG(); - -#ifndef AML_CLK_LOCK_ERROR - clk_disable_unprepare(clk_mali_x_old); -#endif - GPU_CLK_DBG("old %s:enable(%d)\n", clk_mali_x_old->name, clk_mali_x_old->enable_count); - time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec; - GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use); - - return 0; -} - -int mali_clock_set(unsigned int clock) -{ - return mali_clock_critical(critical_clock_set, (size_t)clock); -} - -void disable_clock(void) -{ - struct clk *clk_mali = pmali_plat->clk_mali; - struct clk *clk_mali_x = NULL; - - clk_mali_x = clk_get_parent(clk_mali); - GPU_CLK_DBG(); -#ifndef AML_CLK_LOCK_ERROR - clk_disable_unprepare(clk_mali_x); -#endif - GPU_CLK_DBG(); -} - -void enable_clock(void) -{ - struct clk *clk_mali = pmali_plat->clk_mali; - struct clk *clk_mali_x = NULL; - - clk_mali_x = clk_get_parent(clk_mali); - GPU_CLK_DBG(); -#ifndef AML_CLK_LOCK_ERROR - clk_prepare_enable(clk_mali_x); -#endif - GPU_CLK_DBG(); -} - -u32 get_mali_freq(u32 idx) -{ - if (!mali_pm_statue) { - return pmali_plat->clk_sample[idx]; - } else { - return 0; - } -} - -void set_str_src(u32 data) -{ - printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__); -} - -int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata) -{ - struct device_node *gpu_dn = pdev->dev.of_node; - struct device_node *gpu_clk_dn; - phandle dvfs_clk_hdl; - mali_dvfs_threshold_table *dvfs_tbl = NULL; - uint32_t *clk_sample = NULL; - - struct property *prop; - const __be32 *p; - int length = 0, i = 0; - u32 u; - int ret = 0; - if (!gpu_dn) { - dev_notice(&pdev->dev, "gpu device node not right\n"); - return -ENODEV; - } - - ret = of_property_read_u32(gpu_dn,"num_of_pp", - &mpdata->cfg_pp); - if (ret) { - dev_notice(&pdev->dev, "set max pp to default 6\n"); - mpdata->cfg_pp = 6; - } - mpdata->scale_info.maxpp = mpdata->cfg_pp; - mpdata->maxpp_sysfs = mpdata->cfg_pp; - _dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp); - - ret = of_property_read_u32(gpu_dn,"min_pp", - &mpdata->cfg_min_pp); - if (ret) { - dev_notice(&pdev->dev, "set min pp to default 1\n"); - mpdata->cfg_min_pp = 1; - } - mpdata->scale_info.minpp = mpdata->cfg_min_pp; - _dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp); - - ret = of_property_read_u32(gpu_dn,"min_clk", - &mpdata->cfg_min_clock); - if (ret) { - dev_notice(&pdev->dev, "set min clk default to 0\n"); - mpdata->cfg_min_clock = 0; - } - mpdata->scale_info.minclk = mpdata->cfg_min_clock; - _dev_info(&pdev->dev, "min clk is %d\n", mpdata->scale_info.minclk); - - mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3); - _dev_info(&pdev->dev, "hiu io source 0x%p\n", mpdata->reg_base_hiubus); - - mpdata->reg_base_aobus = of_iomap(gpu_dn, 2); - _dev_info(&pdev->dev, "ao io source 0x%p\n", mpdata->reg_base_aobus); - - ret = of_property_read_u32(gpu_dn,"sc_mpp", - &mpdata->sc_mpp); - if (ret) { - dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp); - mpdata->sc_mpp = mpdata->cfg_pp; - } - _dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp); - - of_get_property(gpu_dn, "tbl", &length); - - length = length /sizeof(u32); - _dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length); - - mpdata->dvfs_table = devm_kzalloc(&pdev->dev, - sizeof(struct mali_dvfs_threshold_table)*length, - GFP_KERNEL); - dvfs_tbl = mpdata->dvfs_table; - if (mpdata->dvfs_table == NULL) { - dev_err(&pdev->dev, "failed to alloc dvfs table\n"); - return -ENOMEM; - } - mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL); - if (mpdata->clk_sample == NULL) { - dev_err(&pdev->dev, "failed to alloc clk_sample table\n"); - return -ENOMEM; - } - clk_sample = mpdata->clk_sample; - mpdata->dvfs_table_size = 0; - - of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) { - dvfs_clk_hdl = (phandle) u; - gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl); - ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq); - if (ret) { - dev_notice(&pdev->dev, "read clk_freq failed\n"); - } - ret = of_property_read_string(gpu_clk_dn,"clk_parent", - &dvfs_tbl->clk_parent); - if (ret) { - dev_notice(&pdev->dev, "read clk_parent failed\n"); - } - dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent); - if (IS_ERR(dvfs_tbl->clkp_handle)) { - dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent); - } - ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq); - if (ret) { - dev_notice(&pdev->dev, "read clk_parent freq failed\n"); - } - ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage); - if (ret) { - dev_notice(&pdev->dev, "read voltage failed\n"); - } - ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count); - if (ret) { - dev_notice(&pdev->dev, "read keep_count failed\n"); - } - //downthreshold and upthreshold shall be u32 - ret = of_property_read_u32_array(gpu_clk_dn,"threshold", - &dvfs_tbl->downthreshold, 2); - if (ret) { - dev_notice(&pdev->dev, "read threshold failed\n"); - } - dvfs_tbl->freq_index = i; - - *clk_sample = dvfs_tbl->clk_freq / 1000000; - - dvfs_tbl ++; - clk_sample ++; - i++; - mpdata->dvfs_table_size ++; - } - dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size); - dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n", - mpdata->dvfs_table, - sizeof(mpdata->dvfs_table[0])); - - ret = of_property_read_u32(gpu_dn,"max_clk", - &mpdata->cfg_clock); - if (ret) { - dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2); - mpdata->cfg_clock = mpdata->dvfs_table_size-2; - } - - mpdata->cfg_clock_bkup = mpdata->cfg_clock; - mpdata->maxclk_sysfs = mpdata->cfg_clock; - mpdata->scale_info.maxclk = mpdata->cfg_clock; - _dev_info(&pdev->dev, "max clk is %d\n", mpdata->scale_info.maxclk); - - ret = of_property_read_u32(gpu_dn,"turbo_clk", - &mpdata->turbo_clock); - if (ret) { - dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1); - mpdata->turbo_clock = mpdata->dvfs_table_size-1; - } - _dev_info(&pdev->dev, "turbo clk is %d\n", mpdata->turbo_clock); - - ret = of_property_read_u32(gpu_dn,"def_clk", - &mpdata->def_clock); - if (ret) { - mpdata->def_clock = mpdata->scale_info.maxclk; - dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock); - } - if (mpdata->def_clock > mpdata->scale_info.maxclk) - mpdata->def_clock = mpdata->scale_info.maxclk; - - _dev_info(&pdev->dev, "default clk is %d\n", mpdata->def_clock); - - dvfs_tbl = mpdata->dvfs_table; - clk_sample = mpdata->clk_sample; - for (i = 0; i< mpdata->dvfs_table_size; i++) { - _dev_info(&pdev->dev, "====================%d====================\n" - "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n", - i, - dvfs_tbl->clk_freq, dvfs_tbl->clk_parent, - dvfs_tbl->voltage, dvfs_tbl->keep_count, - dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample); - dvfs_tbl ++; - clk_sample ++; - } - - mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu"); - mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0"); - mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1"); - if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) { - dev_err(&pdev->dev, "failed to get clock pointer\n"); - return -EFAULT; - } - - pmali_plat = mpdata; - mpdata->pdev = pdev; - return 0; -} -#else -int mali_clock_init_clk_tree(struct platform_device* pdev) -{ - mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock]; - struct clk *clk_mali = pmali_plat->clk_mali; - - if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) && - !IS_ERR(dvfs_tbl->clkp_handle) && - (0 != dvfs_tbl->clkp_freq)) { - clk_prepare_enable(dvfs_tbl->clkp_handle); - clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq); - } - clk_prepare_enable(clk_mali); - clk_set_rate(clk_mali, dvfs_tbl->clk_freq); - - return 0; -} - -int mali_clock_init(mali_plat_info_t *pdev) -{ - *pdev = *pdev; - return 0; -} - -int mali_clock_critical(critical_t critical, size_t param) -{ - int ret = 0; - - ret = critical(param); - - return ret; -} - -static int critical_clock_set(size_t param) -{ - int ret = 0; - unsigned int idx = param; - mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx]; - - struct clk *clk_mali = pmali_plat->clk_mali; - unsigned long time_use=0; - - - GPU_CLK_DBG(); - do_gettimeofday(&start); - ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq); - do_gettimeofday(&end); - GPU_CLK_DBG(); - -#ifndef AML_CLK_LOCK_ERROR - clk_disable_unprepare(clk_mali_x_old); -#endif - time_use = (end.tv_sec - start.tv_sec)*1000000 + end.tv_usec - start.tv_usec; - GPU_CLK_DBG("step 1, mali_mux use: %ld us\n", time_use); - - return 0; -} - -int mali_clock_set(unsigned int clock) -{ - return mali_clock_critical(critical_clock_set, (size_t)clock); -} - -void disable_clock(void) -{ -#ifndef AML_CLK_LOCK_ERROR - struct clk *clk_mali = pmali_plat->clk_mali; - - GPU_CLK_DBG(); - clk_disable_unprepare(clk_mali); -#endif - GPU_CLK_DBG(); -} - -void enable_clock(void) -{ -#ifndef AML_CLK_LOCK_ERROR - struct clk *clk_mali = pmali_plat->clk_mali; - - clk_prepare_enable(clk_mali); -#endif - GPU_CLK_DBG(); -} - -u32 get_mali_freq(u32 idx) -{ - if (!mali_pm_statue) { - return pmali_plat->clk_sample[idx]; - } else { - return 0; - } -} - -void set_str_src(u32 data) -{ - printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__); -} - -int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata) -{ - struct device_node *gpu_dn = pdev->dev.of_node; - struct device_node *gpu_clk_dn; - phandle dvfs_clk_hdl; - mali_dvfs_threshold_table *dvfs_tbl = NULL; - uint32_t *clk_sample = NULL; - - struct property *prop; - const __be32 *p; - int length = 0, i = 0; - u32 u; - - int ret = 0; - if (!gpu_dn) { - dev_notice(&pdev->dev, "gpu device node not right\n"); - return -ENODEV; - } - - ret = of_property_read_u32(gpu_dn,"num_of_pp", - &mpdata->cfg_pp); - if (ret) { - dev_notice(&pdev->dev, "set max pp to default 6\n"); - mpdata->cfg_pp = 6; - } - mpdata->scale_info.maxpp = mpdata->cfg_pp; - mpdata->maxpp_sysfs = mpdata->cfg_pp; - _dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp); - - ret = of_property_read_u32(gpu_dn,"min_pp", - &mpdata->cfg_min_pp); - if (ret) { - dev_notice(&pdev->dev, "set min pp to default 1\n"); - mpdata->cfg_min_pp = 1; - } - mpdata->scale_info.minpp = mpdata->cfg_min_pp; - _dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp); - - ret = of_property_read_u32(gpu_dn,"min_clk", - &mpdata->cfg_min_clock); - if (ret) { - dev_notice(&pdev->dev, "set min clk default to 0\n"); - mpdata->cfg_min_clock = 0; - } - mpdata->scale_info.minclk = mpdata->cfg_min_clock; - _dev_info(&pdev->dev, "min clk is %d\n", mpdata->scale_info.minclk); - - mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3); - _dev_info(&pdev->dev, "hiu io source 0x%p\n", mpdata->reg_base_hiubus); - - mpdata->reg_base_aobus = of_iomap(gpu_dn, 2); - _dev_info(&pdev->dev, "hiu io source 0x%p\n", mpdata->reg_base_aobus); - - ret = of_property_read_u32(gpu_dn,"sc_mpp", - &mpdata->sc_mpp); - if (ret) { - dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp); - mpdata->sc_mpp = mpdata->cfg_pp; - } - _dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp); - - of_get_property(gpu_dn, "tbl", &length); - - length = length /sizeof(u32); - _dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length); - - mpdata->dvfs_table = devm_kzalloc(&pdev->dev, - sizeof(struct mali_dvfs_threshold_table)*length, - GFP_KERNEL); - dvfs_tbl = mpdata->dvfs_table; - if (mpdata->dvfs_table == NULL) { - dev_err(&pdev->dev, "failed to alloc dvfs table\n"); - return -ENOMEM; - } - mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL); - if (mpdata->clk_sample == NULL) { - dev_err(&pdev->dev, "failed to alloc clk_sample table\n"); - return -ENOMEM; - } - clk_sample = mpdata->clk_sample; - of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) { - dvfs_clk_hdl = (phandle) u; - gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl); - ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq); - if (ret) { - dev_notice(&pdev->dev, "read clk_freq failed\n"); - } - - ret = of_property_read_string(gpu_clk_dn,"clk_parent", - &dvfs_tbl->clk_parent); - if (ret) { - dev_notice(&pdev->dev, "read clk_parent failed\n"); - } else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) { - dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent); - if (IS_ERR(dvfs_tbl->clkp_handle)) { - dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent); - } - ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq); - if (ret) { - dev_notice(&pdev->dev, "read clk_parent freq failed\n"); - } - } - - ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage); - if (ret) { - dev_notice(&pdev->dev, "read voltage failed\n"); - } - ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count); - if (ret) { - dev_notice(&pdev->dev, "read keep_count failed\n"); - } - //downthreshold and upthreshold shall be u32 - ret = of_property_read_u32_array(gpu_clk_dn,"threshold", - &dvfs_tbl->downthreshold, 2); - if (ret) { - dev_notice(&pdev->dev, "read threshold failed\n"); - } - dvfs_tbl->freq_index = i; - - *clk_sample = dvfs_tbl->clk_freq / 1000000; - - dvfs_tbl ++; - clk_sample ++; - i++; - mpdata->dvfs_table_size ++; - } - - ret = of_property_read_u32(gpu_dn,"max_clk", - &mpdata->cfg_clock); - if (ret) { - dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2); - mpdata->cfg_clock = mpdata->dvfs_table_size-2; - } - - mpdata->cfg_clock_bkup = mpdata->cfg_clock; - mpdata->maxclk_sysfs = mpdata->cfg_clock; - mpdata->scale_info.maxclk = mpdata->cfg_clock; - _dev_info(&pdev->dev, "max clk is %d\n", mpdata->scale_info.maxclk); - - ret = of_property_read_u32(gpu_dn,"turbo_clk", - &mpdata->turbo_clock); - if (ret) { - dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1); - mpdata->turbo_clock = mpdata->dvfs_table_size-1; - } - _dev_info(&pdev->dev, "turbo clk is %d\n", mpdata->turbo_clock); - - ret = of_property_read_u32(gpu_dn,"def_clk", - &mpdata->def_clock); - if (ret) { - mpdata->def_clock = mpdata->scale_info.maxclk; - dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock); - } - if (mpdata->def_clock > mpdata->scale_info.maxclk) - mpdata->def_clock = mpdata->scale_info.maxclk; - _dev_info(&pdev->dev, "default clk is %d\n", mpdata->def_clock); - - dvfs_tbl = mpdata->dvfs_table; - clk_sample = mpdata->clk_sample; - for (i = 0; i< mpdata->dvfs_table_size; i++) { - _dev_info(&pdev->dev, "====================%d====================\n" - "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n", - i, - dvfs_tbl->clk_freq, dvfs_tbl->clk_parent, - dvfs_tbl->voltage, dvfs_tbl->keep_count, - dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample); - dvfs_tbl ++; - clk_sample ++; - } - _dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size); - - mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux"); -#if 0 - mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_mali_0"); - mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_mali_1"); -#endif - if (IS_ERR(mpdata->clk_mali)) { - dev_err(&pdev->dev, "failed to get clock pointer\n"); - return -EFAULT; - } - - pmali_plat = mpdata; - mpdata->pdev = pdev; - return 0; -} - -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.h deleted file mode 100644 index 939d3b43b224..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_clock.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * mali_clock.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef __MALI_CLOCK_H__ -#define __MALI_CLOCK_H__ -#include -#include -#include -#include -#include - -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29)) -#include -#endif - -#ifndef HHI_MALI_CLK_CNTL -#define HHI_MALI_CLK_CNTL 0x6C -#define mplt_read(r) readl((pmali_plat->reg_base_hiubus) + ((r)<<2)) -#define mplt_write(r, v) writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2))) -#define mplt_setbits(r, m) mplt_write((r), (mplt_read(r) | (m))); -#define mplt_clrbits(r, m) mplt_write((r), (mplt_read(r) & (~(m)))); -#endif - -//extern int mali_clock_init(struct platform_device *dev); -int mali_clock_init_clk_tree(struct platform_device *pdev); - -typedef int (*critical_t)(size_t param); -int mali_clock_critical(critical_t critical, size_t param); - -int mali_clock_init(mali_plat_info_t*); -int mali_clock_set(unsigned int index); -void disable_clock(void); -void enable_clock(void); -u32 get_mali_freq(u32 idx); -void set_str_src(u32 data); -int mali_dt_info(struct platform_device *pdev, - struct mali_plat_info_t *mpdata); -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_devicetree.c deleted file mode 100755 index 7ce3539826fe..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_devicetree.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * mali_kbase_config_devicetree.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#include -#include - -#define FALLBACK_STATIC_TEMPERATURE 55000 - -static unsigned long t83x_static_power(unsigned long voltage) -{ -#if 0 - struct thermal_zone_device *tz; - unsigned long temperature, temp; - unsigned long temp_squared, temp_cubed, temp_scaling_factor; - const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); - const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; - - tz = thermal_zone_get_zone_by_name("gpu"); - if (IS_ERR(tz)) { - pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", - PTR_ERR(tz)); - temperature = FALLBACK_STATIC_TEMPERATURE; - } else { - int ret; - - ret = tz->ops->get_temp(tz, &temperature); - if (ret) { - pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", - ret); - temperature = FALLBACK_STATIC_TEMPERATURE; - } - } - - /* Calculate the temperature scaling factor. To be applied to the - * voltage scaled power. - */ - temp = temperature / 1000; - temp_squared = temp * temp; - temp_cubed = temp_squared * temp; - temp_scaling_factor = - (2 * temp_cubed) - - (80 * temp_squared) - + (4700 * temp) - + 32000; - - return (((coefficient * voltage_cubed) >> 20) - * temp_scaling_factor) - / 1000000; -#else - return 0; -#endif -} - -static unsigned long t83x_dynamic_power(unsigned long freq, - unsigned long voltage) -{ - /* The inputs: freq (f) is in Hz, and voltage (v) in mV. - * The coefficient (c) is in mW/(MHz mV mV). - * - * This function calculates the dynamic power after this formula: - * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) - */ - const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ - const unsigned long f_mhz = freq / 1000000; /* MHz */ - const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ - - return (coefficient * v2 * f_mhz) / 1000000; /* mW */ -} -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)) -struct devfreq_cooling_ops t83x_model_ops = { -#else -struct devfreq_cooling_power t83x_model_ops = { -#endif - .get_static_power = t83x_static_power, - .get_dynamic_power = t83x_dynamic_power, -}; - -#endif - -#include - -int kbase_platform_early_init(void) -{ - /* Nothing needed at this stage */ - return 0; -} - -static struct kbase_platform_config dummy_platform_config; - -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &dummy_platform_config; -} - -#ifndef CONFIG_OF -int kbase_platform_register(void) -{ - return 0; -} - -void kbase_platform_unregister(void) -{ -} -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_platform.h deleted file mode 100755 index 233a18ebfaa2..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_config_platform.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * mali_kbase_config_platform.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -/** - * Maximum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. - * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MAX (750000) -/** - * Minimum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. - * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MIN (100000) - -/** - * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock - * - * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func - * for the function prototype. - * - * Attached value: A kbase_cpu_clk_speed_func. - * Default Value: NA - */ -#define CPU_SPEED_FUNC (NULL) - -/** - * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock - * - * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func - * for the function prototype. - * - * Attached value: A kbase_gpu_clk_speed_func. - * Default Value: NA - */ -#define GPU_SPEED_FUNC (NULL) - -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) - -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -extern struct kbase_platform_funcs_conf dt_funcs_conf; -#define PLATFORM_FUNCS (&dt_funcs_conf) - -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#ifdef CONFIG_DEVFREQ_THERMAL -#define POWER_MODEL_CALLBACKS (&t83x_model_ops) -extern struct devfreq_cooling_ops t83x_model_ops; -#else -#define POWER_MODEL_CALLBACKS (NULL) -#endif -extern struct kbase_pm_callback_conf pm_callbacks; - -void mali_dev_freeze(void); -void mali_dev_restore(void); - -/** - * Autosuspend delay - * - * The delay time (in milliseconds) to be used for autosuspend - */ -#define AUTO_SUSPEND_DELAY (100) diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.c deleted file mode 100755 index 7658626000c9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * mali_kbase_meson.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include "mali_kbase_meson.h" - -int meson_gpu_reset(struct kbase_device *kbdev) -{ - struct meson_context *platform = kbdev->platform_context; - void __iomem *reg_base_reset = platform->reg_base_reset; - u32 value; - - //JOHNT - // Level reset mail - - // Level reset mail - //writel(~(0x1<<14), reg_base_reset + P_RESET2_MASK * 4); - //writel(~(0x1<<14), reg_base_reset + P_RESET2_LEVEL * 4); - - //writel(0xffffffff, reg_base_reset + P_RESET2_LEVEL * 4); - //writel(0xffffffff, reg_base_reset + P_RESET0_LEVEL * 4); - - MESON_PRINT("%s, %d\n", __func__, __LINE__); - MESON_PRINT("reg_base=%p, reset0=%p\n", reg_base_reset, reg_base_reset + RESET0_MASK * 4); - udelay(100); -#if 0 - value = readl(reg_base_reset + RESET0_REGISTER * 4); - MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value); -#endif - - udelay(100); - MESON_PRINT("%s, %d\n", __func__, __LINE__); - value = readl(reg_base_reset + RESET0_MASK * 4); - value = value & (~(0x1<<20)); - writel(value, reg_base_reset + RESET0_MASK * 4); - - udelay(100); -#if 0 - value = readl(reg_base_reset + RESET0_REGISTER * 4); - MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value); - udelay(100); -#endif - - value = readl(reg_base_reset + RESET0_LEVEL * 4); - value = value & (~(0x1<<20)); - //MESON_PRINT("line(%d), value=%x\n", __LINE__, value); - writel(value, reg_base_reset + RESET0_LEVEL * 4); - udelay(100); - -#if 0 - value = readl(reg_base_reset + RESET0_REGISTER * 4); - MESON_PRINT("line(%d), RESET0_REGISTER=%x\n", __LINE__, value); - udelay(100); -#endif - -/////////////// -#if 0 - value = readl(reg_base_reset + RESET2_REGISTER * 4); - MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value); -#endif - - udelay(100); - value = readl(reg_base_reset + RESET2_MASK * 4); - value = value & (~(0x1<<14)); - //MESON_PRINT("line(%d), value=%x\n", __LINE__, value); - writel(value, reg_base_reset + RESET2_MASK * 4); - -#if 0 - value = readl(reg_base_reset + RESET2_REGISTER * 4); - MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value); -#endif - - value = readl(reg_base_reset + RESET2_LEVEL * 4); - value = value & (~(0x1<<14)); - //MESON_PRINT("line(%d), value=%x\n", __LINE__, value); - writel(value, reg_base_reset + RESET2_LEVEL * 4); - udelay(100); - -#if 0 - value = readl(reg_base_reset + RESET2_REGISTER * 4); - MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value); -#endif - - udelay(100); - value = readl(reg_base_reset + RESET0_LEVEL * 4); - value = value | ((0x1<<20)); - //MESON_PRINT("line(%d), value=%x\n", __LINE__, value); - writel(value, reg_base_reset + RESET0_LEVEL * 4); -#if 0 - value = readl(reg_base_reset + RESET2_REGISTER * 4); - MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value); -#endif - - udelay(100); - value = readl(reg_base_reset + RESET2_LEVEL * 4); - value = value | ((0x1<<14)); - //MESON_PRINT("line(%d), value=%x\n", __LINE__, value); - writel(value, reg_base_reset + RESET2_LEVEL * 4); -#if 0 - value = readl(reg_base_reset + RESET2_REGISTER * 4); - MESON_PRINT("line(%d), RESET2_REGISTER=%x\n", __LINE__, value); -#endif - - udelay(10); // OR POLL for reset done - - return 0; -} - -void meson_gpu_pwr_on(struct kbase_device *kbdev, u32 mask) -{ - u32 part1_done = 0; - u32 value = 0; - u32 count = 0; - - kbdev->pm.backend.gpu_powered = true; - MESON_PRINT("%s, %d begin\n", __func__,__LINE__); - -#if 0 - value = 0x10 | (0x1<<16); -#else - value = 0xfff | (0x20<<16); -#endif - while (part1_done != value) { - Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819); - Mali_WrReg(GPU_CONTROL_REG(PWR_OVERRIDE1), value); - part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); - MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done); - udelay(20); - count ++; - if (0 == (count %100)) MESON_PRINT("write again, count%d\n", count); - } - part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); - MESON_PRINT("write again, count=%d, overrider1=%x\n", count, part1_done); - - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - - MESON_PRINT("%s, %d\n", __func__,__LINE__); - if ((mask & 0x1) != 0 ) { - MESON_PRINT("%s, %d\n", __func__,__LINE__); - Mali_WrReg(0x00000190, 0xffffffff); // Power on all cores - Mali_WrReg(0x00000194, 0xffffffff); // Power on all cores - Mali_WrReg(0x000001a0, 0xffffffff); // Power on all cores - Mali_WrReg(0x000001a4, 0xffffffff); // Power on all cores - } - MESON_PRINT("power on %d\n", __LINE__); - - if ( (mask >> 1) != 0 ) { - Mali_WrReg(0x00000180, mask >> 1); // Power on all cores - Mali_WrReg(0x00000184, 0x0); // Power on all cores - MESON_PRINT("%s, %d\n", __func__,__LINE__); - } - - MESON_PRINT("%s, %d\n", __func__,__LINE__); - if ( mask != 0 ) { - MESON_PRINT("%s, %d\n", __func__,__LINE__); - udelay(10); - part1_done = Mali_RdReg(0x0000020); - while(part1_done ==0) { - part1_done = Mali_RdReg(0x00000020); - udelay(10); - } - - MESON_PRINT("%s, %d\n", __func__,__LINE__); - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - } - MESON_PRINT("%s, %d end\n", __func__,__LINE__); -} - -void meson_gpu_pwr_off(struct kbase_device *kbdev, u32 mask) -{ -#if 1 - u32 part1_done; - part1_done = 0; - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - - if ( (mask >> 1) != 0 ) { - Mali_WrReg(0x000001C0, mask >> 1); // Power off all cores - Mali_WrReg(0x000001C4, 0x0); // Power off all cores - } - - if ( (mask & 0x1) != 0 ) { - Mali_WrReg(0x000001D0, 0xffffffff); // Power off all cores - Mali_WrReg(0x000001D4, 0xffffffff); // Power off all cores - Mali_WrReg(0x000001E0, 0xffffffff); // Power off all cores - Mali_WrReg(0x000001E4, 0xffffffff); // Power off all cores - } - - if ( mask != 0 ) { - part1_done = Mali_RdReg(0x0000020); - while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); } - MESON_PRINT("Mali_pwr_off:gpu_irq : %x\n", part1_done); - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - } -#endif -} - - - - -static int kbase_platform_meson_init(struct kbase_device *kbdev) -{ -#if 0 - int err; -#endif - struct device_node *gpu_dn = kbdev->dev->of_node; -#ifdef CONFIG_MALI_MIDGARD_DVFS - unsigned long flags; -#endif /* CONFIG_MALI_MIDGARD_DVFS */ - struct meson_context *platform; - u32 part1_done = 0; - - platform = kmalloc(sizeof(struct meson_context), GFP_KERNEL); - - if (!platform) - return -ENOMEM; - - memset(platform, 0, sizeof(struct meson_context)); - - kbdev->platform_context = (void *) platform; - - platform->reg_base_reset = of_iomap(gpu_dn, 1); - _dev_info(kbdev->dev, "reset io source 0x%p\n",platform->reg_base_reset); - - platform->reg_base_aobus = of_iomap(gpu_dn, 2); - _dev_info(kbdev->dev, "ao io source 0x%p\n", platform->reg_base_aobus); - - platform->reg_base_hiubus = of_iomap(gpu_dn, 3); - _dev_info(kbdev->dev, "hiu io source 0x%p\n", platform->reg_base_hiubus); - - platform->clk_mali = devm_clk_get(kbdev->dev, "gpu_mux"); - if (IS_ERR(platform->clk_mali)) { - dev_err(kbdev->dev, "failed to get clock pointer\n"); - } else { - clk_prepare_enable(platform->clk_mali); - clk_set_rate(platform->clk_mali, 285000000); - } - MESON_PRINT("%s, %d begin\n", __func__, __LINE__); - meson_gpu_reset(kbdev); - meson_gpu_pwr_on(kbdev, 0xe); - - part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); - MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done); - meson_gpu_pwr_off(kbdev, 0xe); -#if 1 - part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); - MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done); - - meson_gpu_reset(kbdev); - meson_gpu_pwr_on(kbdev, 0xe); - part1_done = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); - MESON_PRINT("line%d, overrider1=%x\n", __LINE__, part1_done); -#endif -#if 0 - platform->cmu_pmu_status = 0; - platform->dvfs_wq = NULL; - platform->polling_speed = 100; - gpu_debug_level = DVFS_WARNING; -#endif - - mutex_init(&platform->gpu_clock_lock); - mutex_init(&platform->gpu_dvfs_handler_lock); - spin_lock_init(&platform->gpu_dvfs_spinlock); -#if 0 - err = gpu_control_module_init(kbdev); - if (err) - goto clock_init_fail; - - /* dvfs gobernor init*/ - gpu_dvfs_governor_init(kbdev, G3D_DVFS_GOVERNOR_DEFAULT); -#endif -#ifdef CONFIG_MALI_MIDGARD_DVFS - spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); - platform->wakeup_lock = 0; - spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); -#endif /* CONFIG_MALI_MIDGARD_DVFS */ -#if 0 - /* dvfs handler init*/ - gpu_dvfs_handler_init(kbdev); - - err = gpu_notifier_init(kbdev); - if (err) - goto notifier_init_fail; - - err = gpu_create_sysfs_file(kbdev->dev); - if (err) - goto sysfs_init_fail; -#endif - - MESON_PRINT("%s, %d end\n", __func__, __LINE__); - return 0; -#if 0 -clock_init_fail: -notifier_init_fail: -sysfs_init_fail: - kfree(platform); - - return err; -#endif -} - -/** - ** Meson hardware specific termination - **/ -static void kbase_platform_meson_term(struct kbase_device *kbdev) -{ - struct meson_context *platform; - platform = (struct meson_context *) kbdev->platform_context; -#if 0 - gpu_notifier_term(); - -#ifdef CONFIG_MALI_MIDGARD_DVFS - gpu_dvfs_handler_deinit(kbdev); -#endif /* CONFIG_MALI_MIDGARD_DVFS */ - - gpu_control_module_term(kbdev); -#endif - - kfree(kbdev->platform_context); - kbdev->platform_context = 0; - -#if 0 - gpu_remove_sysfs_file(kbdev->dev); -#endif -} - -struct kbase_platform_funcs_conf platform_funcs = { - .platform_init_func = &kbase_platform_meson_init, - .platform_term_func = &kbase_platform_meson_term, -}; - diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.h deleted file mode 100644 index 5e69f3327582..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_meson.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * mali_kbase_meson.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#include -#include -#include - -#define RESET0_MASK 0x00 -#define RESET1_MASK 0x01 -#define RESET2_MASK 0x02 - -#define RESET0_LEVEL 0x10 -#define RESET1_LEVEL 0x11 -#define RESET2_LEVEL 0x12 - -#define Mali_WrReg(regnum, value) writel((value),(kbdev->reg + (regnum))) -#define Mali_RdReg(regnum) readl(kbdev->reg + (regnum)) -#define MESON_PRINT(...) - -struct meson_context { - struct mutex gpu_clock_lock; - struct mutex gpu_dvfs_handler_lock; - spinlock_t gpu_dvfs_spinlock; -#ifdef CONFIG_MALI_MIDGARD_DVFS - int utilization; - int util_gl_share; - int util_cl_share[2]; -#ifdef CONFIG_CPU_THERMAL_IPA - int norm_utilisation; - int freq_for_normalisation; - unsigned long long power; -#endif /* CONFIG_CPU_THERMAL_IPA */ - int max_lock; - int min_lock; -#if 0 - int user_max_lock[NUMBER_LOCK]; - int user_min_lock[NUMBER_LOCK]; -#endif - int target_lock_type; - int down_requirement; - bool wakeup_lock; - int governor_num; - int governor_type; - char governor_list[100]; - bool dvfs_status; -#ifdef CONFIG_CPU_THERMAL_IPA - int time_tick; - u32 time_busy; - u32 time_idle; -#endif /* CONFIG_CPU_THERMAL_IPA */ -#endif - int cur_clock; - int cur_voltage; - int voltage_margin; - bool tmu_status; - int debug_level; - int polling_speed; - struct workqueue_struct *dvfs_wq; - void __iomem *reg_base_reset; - void __iomem *reg_base_aobus; - void __iomem *reg_base_hiubus; - struct clk *clk_mali; - struct clk *clk_gp; -}; - diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_runtime_pm.c deleted file mode 100755 index 83c9c5b5930d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ /dev/null @@ -1,303 +0,0 @@ -/* - * mali_kbase_runtime_pm.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include "mali_kbase_config_platform.h" - -void *reg_base_hiubus = NULL; -u32 override_value_aml = 0; -static int first = 1; - -#define RESET0_MASK 0x10 -#define RESET1_MASK 0x11 -#define RESET2_MASK 0x12 - -#define RESET0_LEVEL 0x20 -#define RESET1_LEVEL 0x21 -#define RESET2_LEVEL 0x22 -#define Rd(r) readl((reg_base_hiubus) + ((r)<<2)) -#define Wr(r, v) writel((v), ((reg_base_hiubus) + ((r)<<2))) -#define Mali_WrReg(regnum, value) kbase_reg_write(kbdev, (regnum), (value)) -#define Mali_RdReg(regnum) kbase_reg_read(kbdev, (regnum)) -#define stimulus_print printk -#define stimulus_display printk -#define Mali_pwr_off(x) Mali_pwr_off_with_kdev(kbdev, (x)) - -extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); - -//[0]:CG [1]:SC0 [2]:SC2 -static void Mali_pwr_on_with_kdev ( struct kbase_device *kbdev, uint32_t mask) -{ - uint32_t part1_done; - uint32_t shader_present; - uint32_t tiler_present; - uint32_t l2_present; - - part1_done = 0; - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - - shader_present = Mali_RdReg(0x100); - tiler_present = Mali_RdReg(0x110); - l2_present = Mali_RdReg(0x120); - printk("shader_present=%d, tiler_present=%d, l2_present=%d\n", - shader_present, tiler_present, l2_present); - - if ( mask == 0 ) { - Mali_WrReg(0x00000180, 0xffffffff); // Power on all cores (shader low) - Mali_WrReg(0x00000184, 0xffffffff); // Power on all cores (shader high) - Mali_WrReg(0x00000190, 0xffffffff); // Power on all cores (tiler low) - Mali_WrReg(0x00000194, 0xffffffff); // Power on all cores (tiler high) - Mali_WrReg(0x000001a0, 0xffffffff); // Power on all cores (l2 low) - Mali_WrReg(0x000001a4, 0xffffffff); // Power on all cores (l2 high) - } else { - Mali_WrReg(0x00000180, mask); // Power on all cores (shader low) - Mali_WrReg(0x00000184, 0); // Power on all cores (shader high) - Mali_WrReg(0x00000190, mask); // Power on all cores (tiler low) - Mali_WrReg(0x00000194, 0); // Power on all cores (tiler high) - Mali_WrReg(0x000001a0, mask); // Power on all cores (l2 low) - Mali_WrReg(0x000001a4, 0); // Power on all cores (l2 high) - } - - part1_done = Mali_RdReg(0x0000020); - while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); } - stimulus_display("Mali_pwr_on:gpu_irq : %x\n", part1_done); - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts -} - -//[0]:CG [1]:SC0 [2]:SC2 -#if 0 -static void Mali_pwr_off_with_kdev( struct kbase_device *kbdev, uint32_t mask) -{ - uint32_t part1_done; - part1_done = 0; - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts - - if ( mask == 0 ) { - Mali_WrReg(0x000001C0, 0xffffffff); // Power off all cores (tiler low) - Mali_WrReg(0x000001C4, 0xffffffff); // Power off all cores (tiler high) - Mali_WrReg(0x000001D0, 0xffffffff); // Power off all cores (l2 low) - Mali_WrReg(0x000001D4, 0xffffffff); // Power off all cores (l2 high) - Mali_WrReg(0x000001E0, 0xffffffff); // Power off all cores (shader low) - Mali_WrReg(0x000001E4, 0xffffffff); // Power off all cores (shader high) - } else { - Mali_WrReg(0x000001C0, mask); // Power off all cores (tiler low) - Mali_WrReg(0x000001C4, 0x0); // Power off all cores (tiler high) - Mali_WrReg(0x000001D0, mask); // Power off all cores (l2 low) - Mali_WrReg(0x000001D4, 0x0); // Power off all cores (l2 high) - Mali_WrReg(0x000001E0, mask); // Power off all cores (shader low) - Mali_WrReg(0x000001E4, 0x0); // Power off all cores (shader high) - } - - part1_done = Mali_RdReg(0x0000020); - while((part1_done ==0)) { part1_done = Mali_RdReg(0x00000020); } - stimulus_display("Mali_pwr_off:gpu_irq : %x\n", part1_done); - Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts -} -#endif - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - int ret = 1; /* Assume GPU has been powered off */ - int error; - struct platform_device *pdev = to_platform_device(kbdev->dev); - struct resource *reg_res; - u64 core_ready; - u64 l2_ready; - u64 tiler_ready; - u32 value; - - //printk("20151013, %s, %d\n", __FILE__, __LINE__); - if (first == 0) goto ret; - - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - ret = -ENOENT; - } - //printk("%s, %d\n", __FILE__, __LINE__); - if (NULL == reg_base_hiubus) - reg_base_hiubus = ioremap(reg_res->start, resource_size(reg_res)); - - //printk("%s, %d\n", __FILE__, __LINE__); - if (NULL == reg_base_hiubus) { - dev_err(kbdev->dev, "Invalid register resource\n"); - ret = -ENOENT; - } - - //printk("%s, %d\n", __FILE__, __LINE__); - -//JOHNT - // Level reset mail - - // Level reset mail - //Wr(P_RESET2_MASK, ~(0x1<<14)); - //Wr(P_RESET2_LEVEL, ~(0x1<<14)); - - //Wr(P_RESET2_LEVEL, 0xffffffff); - //Wr(P_RESET0_LEVEL, 0xffffffff); - - value = Rd(RESET0_MASK); - value = value & (~(0x1<<20)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET0_MASK, value); - - value = Rd(RESET0_LEVEL); - value = value & (~(0x1<<20)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET0_LEVEL, value); -/////////////// - value = Rd(RESET2_MASK); - value = value & (~(0x1<<14)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET2_MASK, value); - - value = Rd(RESET2_LEVEL); - value = value & (~(0x1<<14)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET2_LEVEL, value); - - value = Rd(RESET0_LEVEL); - value = value | ((0x1<<20)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET0_LEVEL, value); - - value = Rd(RESET2_LEVEL); - value = value | ((0x1<<14)); - //printk("line(%d), value=%x\n", __LINE__, value); - Wr(RESET2_LEVEL, value); - - udelay(10); // OR POLL for reset done - - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_KEY), 0x2968A819); - kbase_reg_write(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16)); - - Mali_pwr_on_with_kdev(kbdev, 0x1); - //printk("set PWR_ORRIDE, reg=%p, reg_start=%llx, reg_size=%zx, reg_mapped=%p\n", - // kbdev->reg, kbdev->reg_start, kbdev->reg_size, reg_base_hiubus); - dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", - (void *)kbdev->dev->pm_domain); - - first = 0; - //printk("%s, %d\n", __FILE__, __LINE__); -ret: - error = pm_runtime_get_sync(kbdev->dev); - if (error == 1) { - /* - * Let core know that the chip has not been - * powered off, so we can save on re-initialization. - */ - ret = 0; - } - udelay(100); -#if 1 - - core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); - l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); - tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); - //printk("core_ready=%llx, l2_ready=%llx, tiler_ready=%llx\n", core_ready, l2_ready, tiler_ready); -#endif - dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); - - return ret; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_power_off\n"); - //printk("%s, %d\n", __FILE__, __LINE__); -#if 0 - iounmap(reg_base_hiubus); - reg_base_hiubus = NULL; -#endif - pm_runtime_mark_last_busy(kbdev->dev); - pm_runtime_put_autosuspend(kbdev->dev); -} - -#ifdef KBASE_PM_RUNTIME -static int kbase_device_runtime_init(struct kbase_device *kbdev) -{ - int ret = 0; - - dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); - - pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); - pm_runtime_use_autosuspend(kbdev->dev); - pm_runtime_set_active(kbdev->dev); - - dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); - pm_runtime_enable(kbdev->dev); - - if (!pm_runtime_enabled(kbdev->dev)) { - dev_warn(kbdev->dev, "pm_runtime not enabled"); - ret = -ENOSYS; - } - - return ret; -} - -static void kbase_device_runtime_disable(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); - pm_runtime_disable(kbdev->dev); -} -#endif -static int pm_callback_runtime_on(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); - - return 0; -} - -static void pm_callback_runtime_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); -} - -static void pm_callback_resume(struct kbase_device *kbdev) -{ - int ret = pm_callback_runtime_on(kbdev); - - WARN_ON(ret); -} - -static void pm_callback_suspend(struct kbase_device *kbdev) -{ - pm_callback_runtime_off(kbdev); -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = pm_callback_suspend, - .power_resume_callback = pm_callback_resume, -#ifdef KBASE_PM_RUNTIME - .power_runtime_init_callback = kbase_device_runtime_init, - .power_runtime_term_callback = kbase_device_runtime_disable, - .power_runtime_on_callback = pm_callback_runtime_on, - .power_runtime_off_callback = pm_callback_runtime_off, -#else /* KBASE_PM_RUNTIME */ - .power_runtime_init_callback = NULL, - .power_runtime_term_callback = NULL, - .power_runtime_on_callback = NULL, - .power_runtime_off_callback = NULL, -#endif /* KBASE_PM_RUNTIME */ -}; diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_platform.h deleted file mode 100644 index 724112f9c5a3..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_platform.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * mali_platform.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#ifndef MALI_PLATFORM_H_ -#define MALI_PLATFORM_H_ - -extern u32 mali_gp_reset_fail; -extern u32 mali_core_timeout; - -#endif /* MALI_PLATFORM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_scaling.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_scaling.h deleted file mode 100644 index aaab5dae8b6b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mali_scaling.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * mali_scaling.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -/** - * @file arm_core_scaling.h - * Example core scaling policy. - */ - -#ifndef __ARM_CORE_SCALING_H__ -#define __ARM_CORE_SCALING_H__ - -#include -#include -#include -#include - -enum mali_scale_mode_t { - MALI_PP_SCALING = 0, - MALI_PP_FS_SCALING, - MALI_SCALING_DISABLE, - MALI_TURBO_MODE, - MALI_SCALING_MODE_MAX -}; - -typedef struct mali_dvfs_threshold_table { - uint32_t freq_index; - uint32_t voltage; - uint32_t keep_count; - uint32_t downthreshold; - uint32_t upthreshold; - uint32_t clk_freq; - const char *clk_parent; - struct clk *clkp_handle; - uint32_t clkp_freq; -} mali_dvfs_threshold_table; - -/** - * restrictions on frequency and number of pp. - */ -typedef struct mali_scale_info_t { - u32 minpp; - u32 maxpp; - u32 minclk; - u32 maxclk; -} mali_scale_info_t; - -/** - * Platform spesific data for meson chips. - */ -typedef struct mali_plat_info_t { - u32 cfg_pp; /* number of pp. */ - u32 cfg_min_pp; - u32 turbo_clock; /* reserved clock src. */ - u32 def_clock; /* gpu clock used most of time.*/ - u32 cfg_clock; /* max clock could be used.*/ - u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */ - u32 cfg_min_clock; - - u32 sc_mpp; /* number of pp used most of time.*/ - u32 bst_gpu; /* threshold for boosting gpu. */ - u32 bst_pp; /* threshold for boosting PP. */ - - u32 *clk; - u32 *clk_sample; - u32 clk_len; - u32 have_switch; /* have clock gate switch or not. */ - - mali_dvfs_threshold_table *dvfs_table; - u32 dvfs_table_size; - - mali_scale_info_t scale_info; - u32 maxclk_sysfs; - u32 maxpp_sysfs; - - /* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/ - u32 limit_on; - - /* for boost up gpu by user. */ - void (*plat_preheat)(void); - - struct platform_device *pdev; - void __iomem *reg_base_hiubus; - void __iomem *reg_base_aobus; - struct work_struct wq_work; - struct clk *clk_mali; - struct clk *clk_mali_0; - struct clk *clk_mali_1; -} mali_plat_info_t; -mali_plat_info_t* get_mali_plat_data(void); - -/** - * Initialize core scaling policy. - * - * @note The core scaling policy will assume that all PP cores are on initially. - * - * @param num_pp_cores Total number of PP cores. - */ -int mali_core_scaling_init(mali_plat_info_t*); - -/** - * Terminate core scaling policy. - */ -void mali_core_scaling_term(void); - -/** - * cancel and flush scaling job queue. - */ -void flush_scaling_job(void); - -/* get current state(pp, clk). */ -void get_mali_rt_clkpp(u32* clk, u32* pp); -u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush); -void revise_mali_rt(void); -/* get max gpu clk level of this chip*/ -int get_gpu_max_clk_level(void); - -/* get or set the scale mode. */ -u32 get_mali_schel_mode(void); -void set_mali_schel_mode(u32 mode); - -/* for frequency reporter in DS-5 streamline. */ -u32 get_current_frequency(void); -void mali_dev_freeze(void); -void mali_dev_restore(void); - -#endif /* __ARM_CORE_SCALING_H__ */ diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.c deleted file mode 100644 index 09feaaf1cb70..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * meson_main2.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -/** - * @file mali_platform.c - * Platform specific Mali driver functions for: - * meson8m2 and the newer chip - */ -#include -#include -#include -#include -#include -#ifdef CONFIG_PM_RUNTIME -#include -#endif -#include -#include -#include - -#include -#include -#include -#include - -#include "mali_scaling.h" -#include "mali_clock.h" -#include "meson_main2.h" - -extern void mali_post_init(void); -struct kbase_device; -//static int gpu_dvfs_probe(struct platform_device *pdev) -int platform_dt_init_func(struct kbase_device *kbdev) -{ - struct device *dev = kbdev->dev; - struct platform_device *pdev = to_platform_device(dev); - - int err = -1; - - err = mali_meson_init_start(pdev); - mali_meson_init_finish(pdev); - mpgpu_class_init(); - mali_post_init(); - return err; -} - -//static int gpu_dvfs_remove(struct platform_device *pdev) -void platform_dt_term_func(struct kbase_device *kbdev) -{ - struct device *dev = kbdev->dev; - struct platform_device *pdev = to_platform_device(dev); - - printk("%s, %d\n", __FILE__, __LINE__); - - mpgpu_class_exit(); - mali_meson_uninit(pdev); - -} - -static u32 last_utilisation, last_util_gl_share, last_util_cl_share[2]; -inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]) -{ - last_utilisation = utilisation; - last_util_gl_share = util_gl_share; - last_util_cl_share[0] = util_cl_share[0]; - last_util_cl_share[1] = util_cl_share[1]; - mali_gpu_utilization_callback(utilisation*255/100); - return 1; -} - -u32 mpgpu_get_utilization(void) -{ - return last_utilisation; -} -u32 mpgpu_get_util_gl_share(void) -{ - return last_util_gl_share; -} -u32 mpgpu_get_util_cl_share(u32 *util) -{ - util[0] = last_util_cl_share[0]; - util[1] = last_util_cl_share[1]; - return 0; -} - -struct kbase_platform_funcs_conf dt_funcs_conf = { - .platform_init_func = platform_dt_init_func, - .platform_term_func = platform_dt_term_func, -}; -#if 0 -static const struct of_device_id gpu_dvfs_ids[] = { - { .compatible = "meson, gpu-dvfs-1.00.a" }, - { }, -}; -MODULE_DEVICE_TABLE(of, gpu_dvfs_ids); - -static struct platform_driver gpu_dvfs_driver = { - .driver = { - .name = "meson-gpu-dvfs", - .owner = THIS_MODULE, - .of_match_table = gpu_dvfs_ids, - }, - .probe = gpu_dvfs_probe, - .remove = gpu_dvfs_remove, -}; -module_platform_driver(gpu_dvfs_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Amlogic SH, MM"); -MODULE_DESCRIPTION("Driver for the Meson GPU dvfs"); -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.h b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.h deleted file mode 100644 index a7b476933144..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/meson_main2.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * meson_main2.h - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef MESON_MAIN_H_ -#define MESON_MAIN_H_ -#include -#include -#include -#ifdef CONFIG_PM_RUNTIME -#include -#endif -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) -#include -#endif - -#include "mali_scaling.h" -#include "mali_clock.h" - -u32 set_max_mali_freq(u32 idx); -u32 get_max_mali_freq(void); - -int mali_meson_init_start(struct platform_device* ptr_plt_dev); -int mali_meson_init_finish(struct platform_device* ptr_plt_dev); -int mali_meson_uninit(struct platform_device* ptr_plt_dev); -int mpgpu_class_init(void); -void mpgpu_class_exit(void); -void mali_gpu_utilization_callback(int utilization_pp); - -u32 mpgpu_get_utilization(void); -u32 mpgpu_get_util_gl_share(void); -u32 mpgpu_get_util_cl_share(u32 *util); -u32 mpgpu_get_gpu_err_count(void); - -#endif /* MESON_MAIN_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mpgpu.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mpgpu.c deleted file mode 100644 index 6cbeb0cb2043..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/mpgpu.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * mpgpu.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -/* Standard Linux headers */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) -#include -#include -#include -#endif - -//#include -#include "meson_main2.h" - -int meson_gpu_data_invalid_count = 0; -int meson_gpu_fault = 0; - -static ssize_t domain_stat_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - unsigned int val; - mali_plat_info_t* pmali_plat = get_mali_plat_data(); - - val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff; - return sprintf(buf, "%x\n", val>>4); - return 0; -} - -#define PREHEAT_CMD "preheat" -#define PLL2_CMD "mpl2" /* mpl2 [11] or [0xxxxxxx] */ -#define SCMPP_CMD "scmpp" /* scmpp [number of pp your want in most of time]. */ -#define BSTGPU_CMD "bstgpu" /* bstgpu [0-256] */ -#define BSTPP_CMD "bstpp" /* bstpp [0-256] */ -#define LIMIT_CMD "lmt" /* lmt [0 or 1] */ -#define MAX_TOKEN 20 -#define FULL_UTILIZATION 256 - -static ssize_t mpgpu_write(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - char *pstart, *cprt = NULL; - u32 val = 0; - mali_plat_info_t* pmali_plat = get_mali_plat_data(); - - cprt = skip_spaces(buf); - pstart = strsep(&cprt," "); - if (strlen(pstart) < 1) - goto quit; - - if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) { - if (pmali_plat->plat_preheat) { - pmali_plat->plat_preheat(); - } - } else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) { - int base = 10; - if ((strlen(cprt) > 2) && (cprt[0] == '0') && - (cprt[1] == 'x' || cprt[1] == 'X')) - base = 16; - if (kstrtouint(cprt, base, &val) <0) - goto quit; - if (val < 11) - pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup; - else - pmali_plat->cfg_clock = pmali_plat->turbo_clock; - pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; - set_str_src(val); - } else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) { - if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) - goto quit; - if ((val > 0) && (val < pmali_plat->cfg_pp)) { - pmali_plat->sc_mpp = val; - } - } else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) { - if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) - goto quit; - if ((val > 0) && (val < FULL_UTILIZATION)) { - pmali_plat->bst_gpu = val; - } - } else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) { - if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) - goto quit; - if ((val > 0) && (val < FULL_UTILIZATION)) { - pmali_plat->bst_pp = val; - } - } else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) { - if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) - goto quit; - - if (val < 2) { - pmali_plat->limit_on = val; - if (val == 0) { - pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; - pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp; - revise_mali_rt(); - } - } - } -quit: - return count; -} - -static ssize_t scale_mode_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", get_mali_schel_mode()); -} - -static ssize_t scale_mode_write(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned int val; - - ret = kstrtouint(buf, 10, &val); - if (0 != ret) - { - return -EINVAL; - } - - set_mali_schel_mode(val); - - return count; -} - -static ssize_t max_freq_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - mali_plat_info_t* pmali_plat = get_mali_plat_data(); - printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n", - pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level()); - return sprintf(buf, "%d\n", get_gpu_max_clk_level()); -} - -static ssize_t max_freq_write(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned int val; - mali_plat_info_t* pmali_plat; - mali_scale_info_t* pinfo; - - pmali_plat = get_mali_plat_data(); - pinfo = &pmali_plat->scale_info; - - ret = kstrtouint(buf, 10, &val); - if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk)) - return -EINVAL; - - pmali_plat->maxclk_sysfs = val; - pinfo->maxclk = val; - revise_mali_rt(); - - return count; -} - -static ssize_t min_freq_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - mali_plat_info_t* pmali_plat = get_mali_plat_data(); - return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk); -} - -static ssize_t min_freq_write(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned int val; - mali_plat_info_t* pmali_plat; - mali_scale_info_t* pinfo; - - pmali_plat = get_mali_plat_data(); - pinfo = &pmali_plat->scale_info; - - ret = kstrtouint(buf, 10, &val); - if ((0 != ret) || (val > pinfo->maxclk)) - return -EINVAL; - - pinfo->minclk = val; - revise_mali_rt(); - - return count; -} - -static ssize_t freq_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", get_current_frequency()); -} - -static ssize_t freq_write(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned int val; - u32 clk, pp; - get_mali_rt_clkpp(&clk, &pp); - - ret = kstrtouint(buf, 10, &val); - if (0 != ret) - return -EINVAL; - - set_mali_rt_clkpp(val, pp, 1); - - return count; -} - -static ssize_t utilization_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", mpgpu_get_utilization()); -} - -static ssize_t util_gl_share_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", mpgpu_get_util_gl_share()); -} - -static ssize_t util_cl_share_read(struct class *class, - struct class_attribute *attr, char *buf) -{ - u32 val[2]; - - mpgpu_get_util_cl_share(val); - - return sprintf(buf, "%d %d\n", val[0], val[1]); -} - -u32 mpgpu_get_gpu_err_count(void) -{ - return (meson_gpu_fault + meson_gpu_data_invalid_count); -} - -static ssize_t meson_gpu_get_err_count(struct class *class, - struct class_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", mpgpu_get_gpu_err_count()); -} - -static ssize_t mpgpu_set_err_count(struct class *class, - struct class_attribute *attr, const char *buf, size_t count) -{ - int ret; - unsigned int val; - - ret = kstrtouint(buf, 10, &val); - if (0 != ret) - return -EINVAL; - - meson_gpu_fault = val; - - return count; -} - -static struct class_attribute mali_class_attrs[] = { - __ATTR(domain_stat, 0644, domain_stat_read, NULL), - __ATTR(mpgpucmd, 0644, NULL, mpgpu_write), - __ATTR(scale_mode, 0644, scale_mode_read, scale_mode_write), - __ATTR(min_freq, 0644, min_freq_read, min_freq_write), - __ATTR(max_freq, 0644, max_freq_read, max_freq_write), - __ATTR(cur_freq, 0644, freq_read, freq_write), - __ATTR(utilization, 0644, utilization_read, NULL), - __ATTR(util_gl, 0644, util_gl_share_read, NULL), - __ATTR(util_cl, 0644, util_cl_share_read, NULL), - __ATTR(gpu_err, 0644, meson_gpu_get_err_count, mpgpu_set_err_count), -}; - -static struct class mpgpu_class = { - .name = "mpgpu", -}; - -int mpgpu_class_init(void) -{ - int ret = 0; - int i; - int attr_num = ARRAY_SIZE(mali_class_attrs); - - ret = class_register(&mpgpu_class); - if (ret) { - printk(KERN_ERR "%s: class_register failed\n", __func__); - return ret; - } - for (i = 0; i< attr_num; i++) { - ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]); - if (ret) { - printk(KERN_ERR "%d ST: class item failed to register\n", i); - } - } - return ret; -} - -void mpgpu_class_exit(void) -{ - class_unregister(&mpgpu_class); -} - diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/platform_gx.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/platform_gx.c deleted file mode 100644 index ac49caa3b945..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/platform_gx.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * platform_gx.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include -#include -#include -#include -#include /* kernel module definitions */ -#include /* request_mem_region */ -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) -#include -#include -#include -#endif -#include -#ifdef CONFIG_AMLOGIC_GPU_THERMAL -#include -#include -//#include -#include -#endif - -#include "mali_scaling.h" -#include "mali_clock.h" -#include "meson_main2.h" - -/* - * For Meson 8 M2. - * - */ -static void mali_plat_preheat(void); -static mali_plat_info_t mali_plat_data = { - .bst_gpu = 210, /* threshold for boosting gpu. */ - .bst_pp = 160, /* threshold for boosting PP. */ - .have_switch = 1, - .limit_on = 1, - .plat_preheat = mali_plat_preheat, -}; - -static void mali_plat_preheat(void) -{ -#ifndef CONFIG_MALI_DVFS - u32 pre_fs; - u32 clk, pp; - - if (get_mali_schel_mode() != MALI_PP_FS_SCALING) - return; - - get_mali_rt_clkpp(&clk, &pp); - pre_fs = mali_plat_data.def_clock + 1; - if (clk < pre_fs) - clk = pre_fs; - if (pp < mali_plat_data.sc_mpp) - pp = mali_plat_data.sc_mpp; - set_mali_rt_clkpp(clk, pp, 1); -#endif -} - -mali_plat_info_t* get_mali_plat_data(void) { - return &mali_plat_data; -} - -int get_mali_freq_level(int freq) -{ - int i = 0, level = -1; - int mali_freq_num; - - if (freq < 0) - return level; - - mali_freq_num = mali_plat_data.dvfs_table_size - 1; - if (freq <= mali_plat_data.clk_sample[0]) - level = mali_freq_num-1; - else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1]) - level = 0; - else { - for (i=0; i= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) { - level = i; - level = mali_freq_num-level - 1; - } - } - } - return level; -} - -unsigned int get_mali_max_level(void) -{ - return mali_plat_data.dvfs_table_size - 1; -} - -int get_gpu_max_clk_level(void) -{ - return mali_plat_data.cfg_clock; -} - -#ifdef CONFIG_AMLOGIC_GPU_THERMAL -static void set_limit_mali_freq(u32 idx) -{ - if (mali_plat_data.limit_on == 0) - return; - if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk) - return; - if (idx > mali_plat_data.maxclk_sysfs) { - printk("idx > max freq\n"); - return; - } - mali_plat_data.scale_info.maxclk= idx; - revise_mali_rt(); -} - -static u32 get_limit_mali_freq(void) -{ - return mali_plat_data.scale_info.maxclk; -} - -#ifdef CONFIG_DEVFREQ_THERMAL -static u32 get_mali_utilization(void) -{ -#ifndef MESON_DRV_BRING - return 55; -#else - return (_mali_ukk_utilization_pp() * 100) / 256; -#endif -} -#endif -#endif - -#ifdef CONFIG_AMLOGIC_GPU_THERMAL -static u32 set_limit_pp_num(u32 num) -{ - u32 ret = -1; - if (mali_plat_data.limit_on == 0) - goto quit; - if (num > mali_plat_data.cfg_pp || - num < mali_plat_data.scale_info.minpp) - goto quit; - - if (num > mali_plat_data.maxpp_sysfs) { - printk("pp > sysfs set pp\n"); - goto quit; - } - - mali_plat_data.scale_info.maxpp = num; - revise_mali_rt(); - ret = 0; -quit: - return ret; -} -#ifdef CONFIG_DEVFREQ_THERMAL -static u32 mali_get_online_pp(void) -{ - unsigned int val; - mali_plat_info_t* pmali_plat = get_mali_plat_data(); - - val = readl(pmali_plat->reg_base_aobus + 0xf0) & 0xff; - if (val == 0x07) /* No pp is working */ - return 0; - -#ifndef MESON_DRV_BRING - return 2; -#else - return mali_executor_get_num_cores_enabled(); -#endif -} -#endif -#endif - -int mali_meson_init_start(struct platform_device* ptr_plt_dev) -{ - //dev_set_drvdata(&ptr_plt_dev->dev, &mali_plat_data); - mali_dt_info(ptr_plt_dev, &mali_plat_data); - mali_clock_init_clk_tree(ptr_plt_dev); - return 0; -} - -int mali_meson_init_finish(struct platform_device* ptr_plt_dev) -{ - if (mali_core_scaling_init(&mali_plat_data) < 0) - return -1; - return 0; -} - -int mali_meson_uninit(struct platform_device* ptr_plt_dev) -{ - mali_core_scaling_term(); - return 0; -} - -void mali_post_init(void) -{ -#ifdef CONFIG_AMLOGIC_GPU_THERMAL - int err; - struct gpufreq_cooling_device *gcdev = NULL; - struct gpucore_cooling_device *gccdev = NULL; - - gcdev = gpufreq_cooling_alloc(); - register_gpu_freq_info(get_current_frequency); - if (IS_ERR(gcdev)) - printk("malloc gpu cooling buffer error!!\n"); - else if (!gcdev) - printk("system does not enable thermal driver\n"); - else { - gcdev->get_gpu_freq_level = get_mali_freq_level; - gcdev->get_gpu_max_level = get_mali_max_level; - gcdev->set_gpu_freq_idx = set_limit_mali_freq; - gcdev->get_gpu_current_max_level = get_limit_mali_freq; -#ifdef CONFIG_DEVFREQ_THERMAL - gcdev->get_gpu_freq = get_mali_freq; - gcdev->get_gpu_loading = get_mali_utilization; - gcdev->get_online_pp = mali_get_online_pp; -#endif - err = gpufreq_cooling_register(gcdev); -#ifdef CONFIG_DEVFREQ_THERMAL - meson_gcooldev_min_update(gcdev->cool_dev); -#endif - if (err < 0) - printk("register GPU cooling error\n"); - printk("gpu cooling register okay with err=%d\n",err); - } - - gccdev = gpucore_cooling_alloc(); - if (IS_ERR(gccdev)) - printk("malloc gpu core cooling buffer error!!\n"); - else if (!gccdev) - printk("system does not enable thermal driver\n"); - else { - gccdev->max_gpu_core_num=mali_plat_data.cfg_pp; - gccdev->set_max_pp_num=set_limit_pp_num; - err = (int)gpucore_cooling_register(gccdev); -#ifdef CONFIG_DEVFREQ_THERMAL - meson_gcooldev_min_update(gccdev->cool_dev); -#endif - if (err < 0) - printk("register GPU cooling error\n"); - printk("gpu core cooling register okay with err=%d\n",err); - } -#endif -} diff --git a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/scaling.c b/drivers/gpu/drm/bifrost/midgard/platform/devicetree/scaling.c deleted file mode 100644 index 081a97b07a95..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/devicetree/scaling.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * scaling.c - * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -/** - * @file arm_core_scaling.c - * Example core scaling policy. - */ - -#include -#include -#include - -#if AMLOGIC_GPU_USE_GPPLL -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16) -#include -#else -#include -#endif -#endif - -#define LOG_MALI_SCALING 1 -#include "meson_main2.h" -#include "mali_clock.h" - -static int currentStep; -#ifndef CONFIG_MALI_DVFS -static int num_cores_enabled; -static int lastStep; -static struct work_struct wq_work; -static mali_plat_info_t* pmali_plat = NULL; -#endif -static int scaling_mode = MALI_PP_FS_SCALING; -extern int mali_pm_statue; -//static int scaling_mode = MALI_SCALING_DISABLE; -//static int scaling_mode = MALI_PP_SCALING; - -#if AMLOGIC_GPU_USE_GPPLL -static struct gp_pll_user_handle_s *gp_pll_user_gpu; -static int is_gp_pll_get; -static int is_gp_pll_put; -#endif -static unsigned scaling_dbg_level = 0; -module_param(scaling_dbg_level, uint, 0644); -MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level"); - -#define scalingdbg(level, fmt, arg...) \ - do { \ - if (scaling_dbg_level >= (level)) \ - printk(fmt , ## arg); \ - } while (0) - -#ifndef CONFIG_MALI_DVFS -static inline void mali_clk_exected(void) -{ - mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table; - uint32_t execStep = currentStep; - mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep]; - - //if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return; - if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) || - (pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){ - return; - } - -#if AMLOGIC_GPU_USE_GPPLL - if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) { - gp_pll_request(gp_pll_user_gpu); - if (!is_gp_pll_get) { - //printk("not get pll\n"); - execStep = currentStep - 1; - } - } else { - //not get the gp pll, do need put - is_gp_pll_get = 0; - is_gp_pll_put = 0; - gp_pll_release(gp_pll_user_gpu); - } -#else - if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) && - !IS_ERR(dvfs_tbl->clkp_handle) && - (0 != dvfs_tbl->clkp_freq)) { - clk_prepare_enable(dvfs_tbl->clkp_handle); - clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq); - } - -#endif - //mali_dev_pause(); - mali_clock_set(pdvfs[execStep].freq_index); - //mali_dev_resume(); -#if AMLOGIC_GPU_USE_GPPLL==0 - if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) && - (0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) { - clk_disable_unprepare(pdvfs[lastStep].clkp_handle); - } -#endif - - lastStep = execStep; -#if AMLOGIC_GPU_USE_GPPLL - if (is_gp_pll_put) { - //printk("release gp0 pll\n"); - gp_pll_release(gp_pll_user_gpu); - gp_pll_request(gp_pll_user_gpu); - is_gp_pll_get = 0; - is_gp_pll_put = 0; - } -#endif - -} -#if AMLOGIC_GPU_USE_GPPLL -static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user, - int event) -{ - if (event == GP_PLL_USER_EVENT_GRANT) { - //printk("granted\n"); - is_gp_pll_get = 1; - is_gp_pll_put = 0; - schedule_work(&wq_work); - } else if (event == GP_PLL_USER_EVENT_YIELD) { - //printk("ask for yield\n"); - is_gp_pll_get = 0; - is_gp_pll_put = 1; - schedule_work(&wq_work); - } - - return 0; -} -#endif - -int mali_perf_set_num_pp_cores(int cores) -{ - cores = cores; - return 0; -} - -static void do_scaling(struct work_struct *work) -{ - mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table; - int err = mali_perf_set_num_pp_cores(num_cores_enabled); - if (err < 0) scalingdbg(1, "set pp failed"); - - scalingdbg(1, "set pp cores to %d\n", num_cores_enabled); - scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n", - currentStep, pdvfs[currentStep].freq_index, - lastStep, pdvfs[lastStep].freq_index); - mali_clk_exected(); -#ifdef CONFIG_MALI400_PROFILING - _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | - MALI_PROFILING_EVENT_CHANNEL_GPU | - MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, - get_current_frequency(), - 0, 0, 0, 0); -#endif -} -#endif - -u32 revise_set_clk(u32 val, u32 flush) -{ - u32 ret = 0; -#ifndef CONFIG_MALI_DVFS - mali_scale_info_t* pinfo; - - pinfo = &pmali_plat->scale_info; - - if (val < pinfo->minclk) - val = pinfo->minclk; - else if (val > pinfo->maxclk) - val = pinfo->maxclk; - - if (val != currentStep) { - currentStep = val; - if (flush) - schedule_work(&wq_work); - else - ret = 1; - } -#endif - return ret; -} - -void get_mali_rt_clkpp(u32* clk, u32* pp) -{ -#ifndef CONFIG_MALI_DVFS - *clk = currentStep; - *pp = num_cores_enabled; -#endif -} - -u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush) -{ - u32 ret = 0; -#ifndef CONFIG_MALI_DVFS - mali_scale_info_t* pinfo; - u32 flush_work = 0; - - pinfo = &pmali_plat->scale_info; - if (clk < pinfo->minclk) - clk = pinfo->minclk; - else if (clk > pinfo->maxclk) - clk = pinfo->maxclk; - - if (clk != currentStep) { - currentStep = clk; - if (flush) - flush_work++; - else - ret = 1; - } - if (pp < pinfo->minpp) - pp = pinfo->minpp; - else if (pp > pinfo->maxpp) - pp = pinfo->maxpp; - - if (pp != num_cores_enabled) { - num_cores_enabled = pp; - if (flush) - flush_work++; - else - ret = 1; - } - - if (flush_work) - schedule_work(&wq_work); -#endif - return ret; -} - -void revise_mali_rt(void) -{ -#ifndef CONFIG_MALI_DVFS - set_mali_rt_clkpp(currentStep, num_cores_enabled, 1); -#endif -} - -void flush_scaling_job(void) -{ -#ifndef CONFIG_MALI_DVFS - cancel_work_sync(&wq_work); -#endif -} - -#ifndef CONFIG_MALI_DVFS -static u32 enable_one_core(void) -{ - scalingdbg(2, "meson: one more pp, curent has %d pp cores\n", num_cores_enabled + 1); - return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0); -} - -static u32 disable_one_core(void) -{ - scalingdbg(2, "meson: disable one pp, current has %d pp cores\n", num_cores_enabled - 1); - return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0); -} - -static u32 enable_max_num_cores(void) -{ - return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0); -} - -static u32 enable_pp_cores(u32 val) -{ - scalingdbg(2, "meson: enable %d pp cores\n", val); - return set_mali_rt_clkpp(currentStep, val, 0); -} -#endif - -int mali_core_scaling_init(mali_plat_info_t *mali_plat) -{ -#ifndef CONFIG_MALI_DVFS - if (mali_plat == NULL) { - scalingdbg(2, " Mali platform data is NULL!!!\n"); - return -1; - } - - pmali_plat = mali_plat; - printk("mali_plat=%p\n", mali_plat); - num_cores_enabled = pmali_plat->sc_mpp; -#if AMLOGIC_GPU_USE_GPPLL - gp_pll_user_gpu = gp_pll_user_register("gpu", 1, - gp_pll_user_cb_gpu); - //not get the gp pll, do need put - is_gp_pll_get = 0; - is_gp_pll_put = 0; - if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n"); -#endif - - currentStep = pmali_plat->def_clock; - lastStep = currentStep; - INIT_WORK(&wq_work, do_scaling); -#endif - return 0; - /* NOTE: Mali is not fully initialized at this point. */ -} - -void mali_core_scaling_term(void) -{ -#ifndef CONFIG_MALI_DVFS - flush_scheduled_work(); -#if AMLOGIC_GPU_USE_GPPLL - gp_pll_user_unregister(gp_pll_user_gpu); -#endif -#endif -} - -#ifndef CONFIG_MALI_DVFS -static u32 mali_threshold [] = { - 40, /* 40% */ - 50, /* 50% */ - 90, /* 90% */ -}; -#endif - -void mali_pp_scaling_update(int utilization_pp) -{ -#ifndef CONFIG_MALI_DVFS - int ret = 0; - - if (mali_threshold[2] < utilization_pp) - ret = enable_max_num_cores(); - else if (mali_threshold[1]< utilization_pp) - ret = enable_one_core(); - else if (0 < utilization_pp) - ret = disable_one_core(); - if (ret == 1) - schedule_work(&wq_work); -#endif -} - -#if LOG_MALI_SCALING -void trace_utilization(int utilization_gpu, u32 current_idx, u32 next, - u32 current_pp, u32 next_pp) -{ - char direction; - if (next > current_idx) - direction = '>'; - else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx)) - direction = '<'; - else - direction = '~'; - - scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n", - direction, - get_mali_freq(current_idx), - get_mali_freq(next), - utilization_gpu, - pmali_plat->dvfs_table[current_idx].downthreshold, - pmali_plat->dvfs_table[current_idx].upthreshold, - current_pp, next_pp); -} -#endif - -#ifndef CONFIG_MALI_DVFS -static int mali_stay_count = 0; -static void mali_decide_next_status(int utilization_pp, int* next_fs_idx, - int* pp_change_flag) -{ - u32 mali_up_limit, decided_fs_idx; - u32 ld_left, ld_right; - u32 ld_up, ld_down; - u32 change_mode; - - *pp_change_flag = 0; - change_mode = 0; - - scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n", - __LINE__, scaling_mode, MALI_TURBO_MODE, - pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk); - - mali_up_limit = (scaling_mode == MALI_TURBO_MODE) ? - pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk; - decided_fs_idx = currentStep; - - ld_up = pmali_plat->dvfs_table[currentStep].upthreshold; - ld_down = pmali_plat->dvfs_table[currentStep].downthreshold; - - scalingdbg(2, "utilization=%d, ld_up=%d\n ", utilization_pp, ld_up); - if (utilization_pp >= ld_up) { /* go up */ - - scalingdbg(2, "currentStep=%d, mali_up_limit=%d\n ", currentStep, mali_up_limit); - if (currentStep < mali_up_limit) { - change_mode = 1; - if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu)) - decided_fs_idx = pmali_plat->def_clock; - else - decided_fs_idx++; - } - if ((utilization_pp >= ld_up) && - (num_cores_enabled < pmali_plat->scale_info.maxpp)) { - if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) { - *pp_change_flag = 1; - change_mode = 1; - } else if (change_mode == 0) { - *pp_change_flag = 2; - change_mode = 1; - } - } -#if LOG_MALI_SCALING - scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n", - utilization_pp, currentStep, mali_up_limit, decided_fs_idx, - num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode); -#endif - } else if (utilization_pp <= ld_down) { /* go down */ - if (mali_stay_count > 0) { - *next_fs_idx = decided_fs_idx; - mali_stay_count--; - return; - } - - if (num_cores_enabled > pmali_plat->sc_mpp) { - change_mode = 1; - if (utilization_pp <= ld_down) { - ld_left = utilization_pp * num_cores_enabled; - ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) * - (num_cores_enabled - 1); - if (ld_left < ld_right) { - change_mode = 2; - } - } - } else if (currentStep > pmali_plat->scale_info.minclk) { - change_mode = 1; - } else if (num_cores_enabled > 1) { /* decrease PPS */ - if (utilization_pp <= ld_down) { - ld_left = utilization_pp * num_cores_enabled; - ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) * - (num_cores_enabled - 1); - scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right); - if (ld_left < ld_right) { - change_mode = 2; - } - } - } - - if (change_mode == 1) { - decided_fs_idx--; - } else if (change_mode == 2) { /* decrease PPS */ - *pp_change_flag = -1; - } - } - - if (decided_fs_idx < 0 ) { - printk("gpu debug, next index below 0\n"); - decided_fs_idx = 0; - } - if (decided_fs_idx > pmali_plat->scale_info.maxclk) { - decided_fs_idx = pmali_plat->scale_info.maxclk; - printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx); - } - - if (change_mode) - mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count; - - *next_fs_idx = decided_fs_idx; -} -#endif - -void mali_pp_fs_scaling_update(int utilization_pp) -{ -#ifndef CONFIG_MALI_DVFS - int ret = 0; - int pp_change_flag = 0; - u32 next_idx = 0; - -#if LOG_MALI_SCALING - u32 last_pp = num_cores_enabled; -#endif - mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag); - - if (pp_change_flag == 1) - ret = enable_pp_cores(pmali_plat->sc_mpp); - else if (pp_change_flag == 2) - ret = enable_one_core(); - else if (pp_change_flag == -1) { - ret = disable_one_core(); - } - -#if LOG_MALI_SCALING - if (pp_change_flag || (next_idx != currentStep)) - trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled); -#endif - - if (next_idx != currentStep) { - ret = 1; - currentStep = next_idx; - } - - if (ret == 1) - schedule_work(&wq_work); -#ifdef CONFIG_MALI400_PROFILING - else - _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | - MALI_PROFILING_EVENT_CHANNEL_GPU | - MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, - get_current_frequency(), - 0, 0, 0, 0); -#endif -#endif -} - -u32 get_mali_schel_mode(void) -{ - return scaling_mode; -} - -void set_mali_schel_mode(u32 mode) -{ -#ifndef CONFIG_MALI_DVFS - if (mode >= MALI_SCALING_MODE_MAX) - return; - scaling_mode = mode; - - //disable thermal in turbo mode - if (scaling_mode == MALI_TURBO_MODE) { - pmali_plat->limit_on = 0; - } else { - pmali_plat->limit_on = 1; - } - /* set default performance range. */ - pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock; - pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; - pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp; - pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp; - - /* set current status and tune max freq */ - if (scaling_mode == MALI_PP_FS_SCALING) { - pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; - enable_pp_cores(pmali_plat->sc_mpp); - } else if (scaling_mode == MALI_SCALING_DISABLE) { - pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; - enable_max_num_cores(); - } else if (scaling_mode == MALI_TURBO_MODE) { - pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock; - enable_max_num_cores(); - } - currentStep = pmali_plat->scale_info.maxclk; - schedule_work(&wq_work); -#endif -} - -u32 get_current_frequency(void) -{ - return get_mali_freq(currentStep); -} - -void mali_gpu_utilization_callback(int utilization_pp) -{ -#ifndef CONFIG_MALI_DVFS - if (mali_pm_statue) - return; - - switch (scaling_mode) { - case MALI_PP_FS_SCALING: - mali_pp_fs_scaling_update(utilization_pp); - break; - case MALI_PP_SCALING: - mali_pp_scaling_update(utilization_pp); - break; - default: - break; - } -#endif -} -static u32 clk_cntl_save = 0; -void mali_dev_freeze(void) -{ - clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL); -} - -void mali_dev_restore(void) -{ - - mplt_write(HHI_MALI_CLK_CNTL, clk_cntl_save); - if (pmali_plat && pmali_plat->pdev) { - mali_clock_init_clk_tree(pmali_plat->pdev); - } else { - printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n", - pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev); - } -} diff --git a/drivers/gpu/drm/bifrost/midgard/platform/meson/Kbuild b/drivers/gpu/drm/bifrost/midgard/platform/meson/Kbuild deleted file mode 100644 index d40d7982ff04..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/meson/Kbuild +++ /dev/null @@ -1,18 +0,0 @@ -# -# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. -# -# - - -mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o diff --git a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_devicetree.c b/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_devicetree.c deleted file mode 100644 index 299d0e75fc03..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_devicetree.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -#include - -int kbase_platform_early_init(void) -{ - /* Nothing needed at this stage */ - return 0; -} - -static struct kbase_platform_config dummy_platform_config; - -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &dummy_platform_config; -} - -#ifndef CONFIG_OF -int kbase_platform_register(void) -{ - return 0; -} - -void kbase_platform_unregister(void) -{ -} -#endif diff --git a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_platform.h deleted file mode 100644 index 2ceca34945b9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_config_platform.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -/** - * Maximum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. - * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MAX (5000) -/** - * Minimum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. - * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MIN (5000) - -/** - * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock - * - * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func - * for the function prototype. - * - * Attached value: A kbase_cpu_clk_speed_func. - * Default Value: NA - */ -#define CPU_SPEED_FUNC (NULL) - -/** - * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock - * - * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func - * for the function prototype. - * - * Attached value: A kbase_gpu_clk_speed_func. - * Default Value: NA - */ -#define GPU_SPEED_FUNC (NULL) - -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) - -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -#define PLATFORM_FUNCS (NULL) - -extern struct kbase_pm_callback_conf pm_callbacks; - -/** - * Autosuspend delay - * - * The delay time (in milliseconds) to be used for autosuspend - */ -#define AUTO_SUSPEND_DELAY (100) diff --git a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_runtime_pm.c b/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_runtime_pm.c deleted file mode 100644 index 49353977853a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/meson/mali_kbase_runtime_pm.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * - * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - -#include -#include -#include -#include "mali_kbase_config_platform.h" - -inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]) -{ - return 1; -} - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - int ret = 1; /* Assume GPU has been powered off */ - int error; - - dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", - (void *)kbdev->dev->pm_domain); - - error = pm_runtime_get_sync(kbdev->dev); - if (error == 1) { - /* - * Let core know that the chip has not been - * powered off, so we can save on re-initialization. - */ - ret = 0; - } - - dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); - - return ret; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_power_off\n"); - - pm_runtime_mark_last_busy(kbdev->dev); - pm_runtime_put_autosuspend(kbdev->dev); -} - -#ifdef KBASE_PM_RUNTIME -static int kbase_device_runtime_init(struct kbase_device *kbdev) -{ - int ret = 0; - - dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); - - pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); - pm_runtime_use_autosuspend(kbdev->dev); - - pm_runtime_set_active(kbdev->dev); - pm_runtime_enable(kbdev->dev); - - if (!pm_runtime_enabled(kbdev->dev)) { - dev_warn(kbdev->dev, "pm_runtime not enabled"); - ret = -ENOSYS; - } - - return ret; -} - -static void kbase_device_runtime_disable(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); - pm_runtime_disable(kbdev->dev); -} -#endif - -static int pm_callback_runtime_on(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); - - return 0; -} - -static void pm_callback_runtime_off(struct kbase_device *kbdev) -{ - dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); -} - -static void pm_callback_resume(struct kbase_device *kbdev) -{ - int ret = pm_callback_runtime_on(kbdev); - - WARN_ON(ret); -} - -static void pm_callback_suspend(struct kbase_device *kbdev) -{ - pm_callback_runtime_off(kbdev); -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = pm_callback_suspend, - .power_resume_callback = pm_callback_resume, -#ifdef KBASE_PM_RUNTIME - .power_runtime_init_callback = kbase_device_runtime_init, - .power_runtime_term_callback = kbase_device_runtime_disable, - .power_runtime_on_callback = pm_callback_runtime_on, - .power_runtime_off_callback = pm_callback_runtime_off, -#else /* KBASE_PM_RUNTIME */ - .power_runtime_init_callback = NULL, - .power_runtime_term_callback = NULL, - .power_runtime_on_callback = NULL, - .power_runtime_off_callback = NULL, -#endif /* KBASE_PM_RUNTIME */ -}; - - diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/Kbuild b/drivers/gpu/drm/bifrost/midgard/platform/vexpress/Kbuild deleted file mode 100755 index 6780e4c9433b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/Kbuild +++ /dev/null @@ -1,24 +0,0 @@ -# -# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - mali_kbase_platform_fake.o diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_platform.h deleted file mode 100755 index fac3cd52182f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_platform.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) - -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -#define PLATFORM_FUNCS (NULL) - -extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_vexpress.c deleted file mode 100755 index d165ce262814..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress/mali_kbase_config_vexpress.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include -#include -#include -#include -#include "mali_kbase_config_platform.h" - -#ifndef CONFIG_OF -static struct kbase_io_resources io_resources = { - .job_irq_number = 68, - .mmu_irq_number = 69, - .gpu_irq_number = 70, - .io_memory_region = { - .start = 0xFC010000, - .end = 0xFC010000 + (4096 * 4) - 1 - } -}; -#endif /* CONFIG_OF */ - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ - return 1; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; - -static struct kbase_platform_config versatile_platform_config = { -#ifndef CONFIG_OF - .io_resources = &io_resources -#endif -}; - -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &versatile_platform_config; -} diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/Kbuild deleted file mode 100755 index 51b408efd48a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/Kbuild +++ /dev/null @@ -1,24 +0,0 @@ -# -# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - mali_kbase_platform_fake.o diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h deleted file mode 100755 index fac3cd52182f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) - -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -#define PLATFORM_FUNCS (NULL) - -extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c deleted file mode 100755 index efca0a5b3493..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include -#include - -#ifndef CONFIG_OF -static struct kbase_io_resources io_resources = { - .job_irq_number = 68, - .mmu_irq_number = 69, - .gpu_irq_number = 70, - .io_memory_region = { - .start = 0x2f010000, - .end = 0x2f010000 + (4096 * 4) - 1} -}; -#endif - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ - return 1; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; - -static struct kbase_platform_config versatile_platform_config = { -#ifndef CONFIG_OF - .io_resources = &io_resources -#endif -}; - -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &versatile_platform_config; -} diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild deleted file mode 100755 index e07709c9b1a5..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +++ /dev/null @@ -1,25 +0,0 @@ -# -# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ - mali_kbase_platform_fake.o diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h deleted file mode 100755 index fac3cd52182f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/** - * Power management configuration - * - * Attached value: pointer to @ref kbase_pm_callback_conf - * Default value: See @ref kbase_pm_callback_conf - */ -#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) - -/** - * Platform specific configuration functions - * - * Attached value: pointer to @ref kbase_platform_funcs_conf - * Default value: See @ref kbase_platform_funcs_conf - */ -#define PLATFORM_FUNCS (NULL) - -extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c deleted file mode 100755 index b6714b95b776..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -#include -#include -#include -#include - -#ifndef CONFIG_OF -static struct kbase_io_resources io_resources = { - .job_irq_number = 75, - .mmu_irq_number = 76, - .gpu_irq_number = 77, - .io_memory_region = { - .start = 0x2F000000, - .end = 0x2F000000 + (4096 * 4) - 1} -}; -#endif - -static int pm_callback_power_on(struct kbase_device *kbdev) -{ - /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ - return 1; -} - -static void pm_callback_power_off(struct kbase_device *kbdev) -{ -} - -struct kbase_pm_callback_conf pm_callbacks = { - .power_on_callback = pm_callback_power_on, - .power_off_callback = pm_callback_power_off, - .power_suspend_callback = NULL, - .power_resume_callback = NULL -}; - -static struct kbase_platform_config versatile_platform_config = { -#ifndef CONFIG_OF - .io_resources = &io_resources -#endif -}; - -struct kbase_platform_config *kbase_get_platform_config(void) -{ - return &versatile_platform_config; -} diff --git a/drivers/gpu/drm/bifrost/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/drm/bifrost/midgard/platform_dummy/mali_ukk_os.h deleted file mode 100755 index ef1ec708edef..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/platform_dummy/mali_ukk_os.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * - * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - - -/** - * @file mali_ukk_os.h - * Types and definitions that are common for Linux OSs for the kernel side of the - * User-Kernel interface. - */ - -#ifndef _UKK_OS_H_ /* Linux version */ -#define _UKK_OS_H_ - -#include - -/** - * @addtogroup uk_api User-Kernel Interface API - * @{ - */ - -/** - * @addtogroup uk_api_kernel UKK (Kernel side) - * @{ - */ - -/** - * Internal OS specific data structure associated with each UKK session. Part - * of a ukk_session object. - */ -typedef struct ukkp_session { - int dummy; /**< No internal OS specific data at this time */ -} ukkp_session; - -/** @} end group uk_api_kernel */ - -/** @} end group uk_api */ - -#endif /* _UKK_OS_H__ */ diff --git a/drivers/gpu/drm/bifrost/midgard/protected_mode_switcher.h b/drivers/gpu/drm/bifrost/midgard/protected_mode_switcher.h deleted file mode 100755 index 8778d812aea0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/protected_mode_switcher.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _PROTECTED_MODE_SWITCH_H_ -#define _PROTECTED_MODE_SWITCH_H_ - -struct protected_mode_device; - -/** - * struct protected_mode_ops - Callbacks for protected mode switch operations - * - * @protected_mode_enable: Callback to enable protected mode for device - * @protected_mode_disable: Callback to disable protected mode for device - */ -struct protected_mode_ops { - /** - * protected_mode_enable() - Enable protected mode on device - * @dev: The struct device - * - * Return: 0 on success, non-zero on error - */ - int (*protected_mode_enable)( - struct protected_mode_device *protected_dev); - - /** - * protected_mode_disable() - Disable protected mode on device, and - * reset device - * @dev: The struct device - * - * Return: 0 on success, non-zero on error - */ - int (*protected_mode_disable)( - struct protected_mode_device *protected_dev); -}; - -/** - * struct protected_mode_device - Device structure for protected mode devices - * - * @ops - Callbacks associated with this device - * @data - Pointer to device private data - * - * This structure should be registered with the platform device using - * platform_set_drvdata(). - */ -struct protected_mode_device { - struct protected_mode_ops ops; - void *data; -}; - -#endif /* _PROTECTED_MODE_SWITCH_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/sconscript b/drivers/gpu/drm/bifrost/midgard/sconscript deleted file mode 100755 index f9d9c1bb243f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/sconscript +++ /dev/null @@ -1,66 +0,0 @@ -# -# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -import sys -Import('env') - -SConscript( 'tests/sconscript' ) - -mock_test = 0 - -# Source files required for kbase. -kbase_src = [ - Glob('*.c'), - Glob('backend/*/*.c'), - Glob('internal/*/*.c'), - Glob('ipa/*.c'), - Glob('platform/%s/*.c' % env['platform_config']), - Glob('thirdparty/*.c'), -] - -if env['platform_config']=='juno_soc': - kbase_src += [Glob('platform/devicetree/*.c')] -else: - kbase_src += [Glob('platform/%s/*.c' % env['platform_config'])] - -if Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock') and env['unit'] == '1': - kbase_src += [Glob('#kernel/drivers/gpu/arm/midgard/tests/internal/src/mock/*.c')] - mock_test = 1 - -make_args = env.kernel_get_config_defines(ret_list = True) + [ - 'PLATFORM=%s' % env['platform'], - 'MALI_KERNEL_TEST_API=%s' % env['debug'], - 'MALI_UNIT_TEST=%s' % env['unit'], - 'MALI_RELEASE_NAME=%s' % env['mali_release_name'], - 'MALI_CUSTOMER_RELEASE=%s' % env['release'], - 'MALI_USE_CSF=%s' % env['csf'], - 'MALI_COVERAGE=%s' % env['coverage'], -] - -kbase = env.BuildKernelModule('$STATIC_LIB_PATH/mali_kbase.ko', kbase_src, - make_args = make_args) - -if 'smc_protected_mode_switcher' in env: - env.Depends('$STATIC_LIB_PATH/mali_kbase.ko', '$STATIC_LIB_PATH/smc_protected_mode_switcher.ko') - -env.KernelObjTarget('kbase', kbase) - -env.AppendUnique(BASE=['cutils_linked_list']) diff --git a/drivers/gpu/drm/bifrost/midgard/tests/Kbuild b/drivers/gpu/drm/bifrost/midgard/tests/Kbuild deleted file mode 100755 index df16a77a7f66..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/Kbuild +++ /dev/null @@ -1,23 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -obj-$(CONFIG_MALI_KUTF) += kutf/ -obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/Kconfig b/drivers/gpu/drm/bifrost/midgard/tests/Kconfig deleted file mode 100755 index fa91aea4ac5c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" -source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" diff --git a/drivers/gpu/drm/bifrost/midgard/tests/Mconfig b/drivers/gpu/drm/bifrost/midgard/tests/Mconfig deleted file mode 100755 index af4e383badb3..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/Mconfig +++ /dev/null @@ -1,32 +0,0 @@ -# -# (C) COPYRIGHT 2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. -# - -config UNIT_TEST_KERNEL_MODULES - bool - default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES - default n - -config BUILD_IPA_TESTS - bool - default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ - default n - -config BUILD_IPA_UNIT_TESTS - bool - default y if NO_MALI && BUILD_IPA_TESTS - default n - -config BUILD_CSF_TESTS - bool - default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF - default n diff --git a/drivers/gpu/drm/bifrost/midgard/tests/build.bp b/drivers/gpu/drm/bifrost/midgard/tests/build.bp deleted file mode 100755 index a0823c7c0c64..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/build.bp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- - */ - -bob_defaults { - name: "kernel_test_module_defaults", - defaults: ["mali_kbase_shared_config_defaults"], - include_dirs: [ - "kernel/drivers/gpu/arm", - "kernel/drivers/gpu/arm/midgard", - "kernel/drivers/gpu/arm/midgard/backend/gpu", - "kernel/drivers/gpu/arm/midgard/tests/include", - ], -} diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers.h deleted file mode 100755 index 15e168c2385b..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_HELPERS_H_ -#define _KERNEL_UTF_HELPERS_H_ - -/* kutf_helpers.h - * Test helper functions for the kernel UTF test infrastructure. - * - * These functions provide methods for enqueuing/dequeuing lines of text sent - * by user space. They are used to implement the transfer of "userdata" from - * user space to kernel. - */ - -#include - -/** - * kutf_helper_input_dequeue() - Dequeue a line sent by user space - * @context: KUTF context - * @str_size: Pointer to an integer to receive the size of the string - * - * If no line is available then this function will wait (interruptibly) until - * a line is available. - * - * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end - * of data. - */ -char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); - -/** - * kutf_helper_input_enqueue() - Enqueue a line sent by user space - * @context: KUTF context - * @str: The user space address of the line - * @size: The length in bytes of the string - * - * This function will use copy_from_user to copy the string out of user space. - * The string need not be NULL-terminated (@size should not include the NULL - * termination). - * - * As a special case @str==NULL and @size==0 is valid to mark the end of input, - * but callers should use kutf_helper_input_enqueue_end_of_data() instead. - * - * Return: 0 on success, -EFAULT if the line cannot be copied from user space, - * -ENOMEM if out of memory. - */ -int kutf_helper_input_enqueue(struct kutf_context *context, - const char __user *str, size_t size); - -/** - * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent - * @context: KUTF context - * - * After this function has been called, kutf_helper_input_dequeue() will always - * return NULL. - */ -void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); - -#endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers_user.h deleted file mode 100755 index 3b1300e1ce6f..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_helpers_user.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_HELPERS_USER_H_ -#define _KERNEL_UTF_HELPERS_USER_H_ - -/* kutf_helpers.h - * Test helper functions for the kernel UTF test infrastructure, whose - * implementation mirrors that of similar functions for kutf-userside - */ - -#include -#include - - -#define KUTF_HELPER_MAX_VAL_NAME_LEN 255 - -enum kutf_helper_valtype { - KUTF_HELPER_VALTYPE_INVALID, - KUTF_HELPER_VALTYPE_U64, - KUTF_HELPER_VALTYPE_STR, - - KUTF_HELPER_VALTYPE_COUNT /* Must be last */ -}; - -struct kutf_helper_named_val { - enum kutf_helper_valtype type; - char *val_name; - union { - u64 val_u64; - char *val_str; - } u; -}; - -/* Extra error values for certain helpers when we want to distinguish between - * Linux's own error values too. - * - * These can only be used on certain functions returning an int type that are - * documented as returning one of these potential values, they cannot be used - * from functions return a ptr type, since we can't decode it with PTR_ERR - * - * No negative values are used - Linux error codes should be used instead, and - * indicate a problem in accessing the data file itself (are generally - * unrecoverable) - * - * Positive values indicate correct access but invalid parsing (can be - * recovered from assuming data in the future is correct) */ -enum kutf_helper_err { - /* No error - must be zero */ - KUTF_HELPER_ERR_NONE = 0, - /* Named value parsing encountered an invalid name */ - KUTF_HELPER_ERR_INVALID_NAME, - /* Named value parsing of string or u64 type encountered extra - * characters after the value (after the last digit for a u64 type or - * after the string end delimiter for string type) */ - KUTF_HELPER_ERR_CHARS_AFTER_VAL, - /* Named value parsing of string type couldn't find the string end - * delimiter. - * - * This cannot be encountered when the NAME="value" message exceeds the - * textbuf's maximum line length, because such messages are not checked - * for an end string delimiter */ - KUTF_HELPER_ERR_NO_END_DELIMITER, - /* Named value didn't parse as any of the known types */ - KUTF_HELPER_ERR_INVALID_VALUE, -}; - - -/* Send named NAME=value pair, u64 value - * - * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long - * - * Any failure will be logged on the suite's current test fixture - * - * Returns 0 on success, non-zero on failure - */ -int kutf_helper_send_named_u64(struct kutf_context *context, - const char *val_name, u64 val); - -/* Get the maximum length of a string that can be represented as a particular - * NAME="value" pair without string-value truncation in the kernel's buffer - * - * Given val_name and the kernel buffer's size, this can be used to determine - * the maximum length of a string that can be sent as val_name="value" pair - * without having the string value truncated. Any string longer than this will - * be truncated at some point during communication to this size. - * - * It is assumed that val_name is a valid name for - * kutf_helper_send_named_str(), and no checking will be made to - * ensure this. - * - * Returns the maximum string length that can be represented, or a negative - * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz - */ -int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); - -/* Send named NAME="str" pair - * - * no escaping allowed in str. Any of the following characters will terminate - * the string: '"' '\\' '\n' - * - * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long - * - * Any failure will be logged on the suite's current test fixture - * - * Returns 0 on success, non-zero on failure */ -int kutf_helper_send_named_str(struct kutf_context *context, - const char *val_name, const char *val_str); - -/* Receive named NAME=value pair - * - * This can receive u64 and string values - check named_val->type - * - * If you are not planning on dynamic handling of the named value's name and - * type, then kutf_helper_receive_check_val() is more useful as a - * convenience function. - * - * String members of named_val will come from memory allocated on the fixture's mempool - * - * Returns 0 on success. Negative value on failure to receive from the 'run' - * file, positive value indicates an enum kutf_helper_err value for correct - * reception of data but invalid parsing */ -int kutf_helper_receive_named_val( - struct kutf_context *context, - struct kutf_helper_named_val *named_val); - -/* Receive and validate NAME=value pair - * - * As with kutf_helper_receive_named_val, but validate that the - * name and type are as expected, as a convenience for a common pattern found - * in tests. - * - * NOTE: this only returns an error value if there was actually a problem - * receiving data. - * - * NOTE: If the underlying data was received correctly, but: - * - isn't of the expected name - * - isn't the expected type - * - isn't correctly parsed for the type - * then the following happens: - * - failure result is recorded - * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID - * - named_val->u will contain some default value that should be relatively - * harmless for the test, including being writable in the case of string - * values - * - return value will be 0 to indicate success - * - * The rationale behind this is that we'd prefer to continue the rest of the - * test with failures propagated, rather than hitting a timeout */ -int kutf_helper_receive_check_val( - struct kutf_helper_named_val *named_val, - struct kutf_context *context, - const char *expect_val_name, - enum kutf_helper_valtype expect_val_type); - -/* Output a named value to kmsg */ -void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); - - -#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_mem.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_mem.h deleted file mode 100755 index 988559de1edf..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_mem.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_MEM_H_ -#define _KERNEL_UTF_MEM_H_ - -/* kutf_mem.h - * Functions for management of memory pools in the kernel. - * - * This module implements a memory pool allocator, allowing a test - * implementation to allocate linked allocations which can then be freed by a - * single free which releases all of the resources held by the entire pool. - * - * Note that it is not possible to free single resources within the pool once - * allocated. - */ - -#include -#include - -/** - * struct kutf_mempool - the memory pool context management structure - * @head: list head on which the allocations in this context are added to - * @lock: mutex for concurrent allocation from multiple threads - * - */ -struct kutf_mempool { - struct list_head head; - struct mutex lock; -}; - -/** - * kutf_mempool_init() - Initialize a memory pool. - * @pool: Memory pool structure to initialize, provided by the user - * - * Return: zero on success - */ -int kutf_mempool_init(struct kutf_mempool *pool); - -/** - * kutf_mempool_alloc() - Allocate memory from a pool - * @pool: Memory pool to allocate from - * @size: Size of memory wanted in number of bytes - * - * Return: Pointer to memory on success, NULL on failure. - */ -void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); - -/** - * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. - * @pool: The memory pool to free - */ -void kutf_mempool_destroy(struct kutf_mempool *pool); -#endif /* _KERNEL_UTF_MEM_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_resultset.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_resultset.h deleted file mode 100755 index 49ebeb4ec546..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_resultset.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_RESULTSET_H_ -#define _KERNEL_UTF_RESULTSET_H_ - -/* kutf_resultset.h - * Functions and structures for handling test results and result sets. - * - * This section of the kernel UTF contains structures and functions used for the - * management of Results and Result Sets. - */ - -/** - * enum kutf_result_status - Status values for a single Test error. - * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark - * results. - * @KUTF_RESULT_SKIP: The test was skipped. - * @KUTF_RESULT_UNKNOWN: The test has an unknown result. - * @KUTF_RESULT_PASS: The test result passed. - * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug - * message. - * @KUTF_RESULT_INFO: The test result passed, but raised - * an informative message. - * @KUTF_RESULT_WARN: The test result passed, but raised a warning - * message. - * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. - * @KUTF_RESULT_FATAL: The test result failed with a fatal error. - * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF - * assertion failure. - * @KUTF_RESULT_USERDATA: User data is ready to be read, - * this is not seen outside the kernel - * @KUTF_RESULT_USERDATA_WAIT: Waiting for user data to be sent, - * this is not seen outside the kernel - * @KUTF_RESULT_TEST_FINISHED: The test has finished, no more results will - * be produced. This is not seen outside kutf - */ -enum kutf_result_status { - KUTF_RESULT_BENCHMARK = -3, - KUTF_RESULT_SKIP = -2, - KUTF_RESULT_UNKNOWN = -1, - - KUTF_RESULT_PASS = 0, - KUTF_RESULT_DEBUG = 1, - KUTF_RESULT_INFO = 2, - KUTF_RESULT_WARN = 3, - KUTF_RESULT_FAIL = 4, - KUTF_RESULT_FATAL = 5, - KUTF_RESULT_ABORT = 6, - - KUTF_RESULT_USERDATA = 7, - KUTF_RESULT_USERDATA_WAIT = 8, - KUTF_RESULT_TEST_FINISHED = 9 -}; - -/* The maximum size of a kutf_result_status result when - * converted to a string - */ -#define KUTF_ERROR_MAX_NAME_SIZE 21 - -#ifdef __KERNEL__ - -#include -#include - -struct kutf_context; - -/** - * struct kutf_result - Represents a single test result. - * @node: Next result in the list of results. - * @status: The status summary (pass / warn / fail / etc). - * @message: A more verbose status message. - */ -struct kutf_result { - struct list_head node; - enum kutf_result_status status; - const char *message; -}; - -/** - * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data - * - * This flag is set within a struct kutf_result_set whenever the test is blocked - * waiting for user data. Attempts to dequeue results when this flag is set - * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This - * is used to output a warning message and end of file. - */ -#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1 - -/** - * struct kutf_result_set - Represents a set of results. - * @results: List head of a struct kutf_result list for storing the results - * @waitq: Wait queue signalled whenever new results are added. - * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT - */ -struct kutf_result_set { - struct list_head results; - wait_queue_head_t waitq; - int flags; -}; - -/** - * kutf_create_result_set() - Create a new result set - * to which results can be added. - * - * Return: The created result set. - */ -struct kutf_result_set *kutf_create_result_set(void); - -/** - * kutf_add_result() - Add a result to the end of an existing result set. - * - * @context: The kutf context - * @status: The result status to add. - * @message: The result message to add. - * - * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. - */ -int kutf_add_result(struct kutf_context *context, - enum kutf_result_status status, const char *message); - -/** - * kutf_remove_result() - Remove a result from the head of a result set. - * @set: The result set. - * - * This function will block until there is a result to read. The wait is - * interruptible, so this function will return with an ERR_PTR if interrupted. - * - * Return: result or ERR_PTR if interrupted - */ -struct kutf_result *kutf_remove_result( - struct kutf_result_set *set); - -/** - * kutf_destroy_result_set() - Free a previously created result set. - * - * @results: The result set whose resources to free. - */ -void kutf_destroy_result_set(struct kutf_result_set *results); - -/** - * kutf_set_waiting_for_input() - The test is waiting for userdata - * - * @set: The result set to update - * - * Causes the result set to always have results and return a fake - * %KUTF_RESULT_USERDATA_WAIT result. - */ -void kutf_set_waiting_for_input(struct kutf_result_set *set); - -/** - * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata - * - * @set: The result set to update - * - * Cancels the effect of kutf_set_waiting_for_input() - */ -void kutf_clear_waiting_for_input(struct kutf_result_set *set); - -#endif /* __KERNEL__ */ - -#endif /* _KERNEL_UTF_RESULTSET_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_suite.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_suite.h deleted file mode 100755 index 8d75f506f9eb..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_suite.h +++ /dev/null @@ -1,569 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_SUITE_H_ -#define _KERNEL_UTF_SUITE_H_ - -/* kutf_suite.h - * Functions for management of test suites. - * - * This collection of data structures, macros, and functions are used to - * create Test Suites, Tests within those Test Suites, and Fixture variants - * of each test. - */ - -#include -#include -#include - -#include -#include - -/* Arbitrary maximum size to prevent user space allocating too much kernel - * memory - */ -#define KUTF_MAX_LINE_LENGTH (1024u) - -/** - * Pseudo-flag indicating an absence of any specified test class. Note that - * tests should not be annotated with this constant as it is simply a zero - * value; tests without a more specific class must be marked with the flag - * KUTF_F_TEST_GENERIC. - */ -#define KUTF_F_TEST_NONE ((unsigned int)(0)) - -/** - * Class indicating this test is a smoke test. - * A given set of smoke tests should be quick to run, enabling rapid turn-around - * of "regress-on-commit" test runs. - */ -#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) - -/** - * Class indicating this test is a performance test. - * These tests typically produce a performance metric, such as "time to run" or - * "frames per second", - */ -#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) - -/** - * Class indicating that this test is a deprecated test. - * These tests have typically been replaced by an alternative test which is - * more efficient, or has better coverage. - */ -#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) - -/** - * Class indicating that this test is a known failure. - * These tests have typically been run and failed, but marking them as a known - * failure means it is easier to triage results. - * - * It is typically more convenient to triage known failures using the - * results database and web UI, as this means there is no need to modify the - * test code. - */ -#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) - -/** - * Class indicating that this test is a generic test, which is not a member of - * a more specific test class. Tests which are not created with a specific set - * of filter flags by the user are assigned this test class by default. - */ -#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) - -/** - * Class indicating this test is a resource allocation failure test. - * A resource allocation failure test will test that an error code is - * correctly propagated when an allocation fails. - */ -#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) - -/** - * Additional flag indicating that this test is an expected failure when - * run in resource failure mode. These tests are never run when running - * the low resource mode. - */ -#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) - -/** - * Flag reserved for user-defined filter zero. - */ -#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) - -/** - * Flag reserved for user-defined filter one. - */ -#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) - -/** - * Flag reserved for user-defined filter two. - */ -#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) - -/** - * Flag reserved for user-defined filter three. - */ -#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) - -/** - * Flag reserved for user-defined filter four. - */ -#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) - -/** - * Flag reserved for user-defined filter five. - */ -#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) - -/** - * Flag reserved for user-defined filter six. - */ -#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) - -/** - * Flag reserved for user-defined filter seven. - */ -#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) - -/** - * Pseudo-flag indicating that all test classes should be executed. - */ -#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) - -/** - * union kutf_callback_data - Union used to store test callback data - * @ptr_value: pointer to the location where test callback data - * are stored - * @u32_value: a number which represents test callback data - */ -union kutf_callback_data { - void *ptr_value; - u32 u32_value; -}; - -/** - * struct kutf_userdata_line - A line of user data to be returned to the user - * @node: struct list_head to link this into a list - * @str: The line of user data to return to user space - * @size: The number of bytes within @str - */ -struct kutf_userdata_line { - struct list_head node; - char *str; - size_t size; -}; - -/** - * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output - * - * If user space reads the "run" file while the test is waiting for user data, - * then the framework will output a warning message and set this flag within - * struct kutf_userdata. A subsequent read will then simply return an end of - * file condition rather than outputting the warning again. The upshot of this - * is that simply running 'cat' on a test which requires user data will produce - * the warning followed by 'cat' exiting due to EOF - which is much more user - * friendly than blocking indefinitely waiting for user data. - */ -#define KUTF_USERDATA_WARNING_OUTPUT 1 - -/** - * struct kutf_userdata - Structure holding user data - * @flags: See %KUTF_USERDATA_WARNING_OUTPUT - * @input_head: List of struct kutf_userdata_line containing user data - * to be read by the kernel space test. - * @input_waitq: Wait queue signalled when there is new user data to be - * read by the kernel space test. - */ -struct kutf_userdata { - unsigned long flags; - struct list_head input_head; - wait_queue_head_t input_waitq; -}; - -/** - * struct kutf_context - Structure representing a kernel test context - * @kref: Refcount for number of users of this context - * @suite: Convenience pointer to the suite this context - * is running - * @test_fix: The fixture that is being run in this context - * @fixture_pool: The memory pool used for the duration of - * the fixture/text context. - * @fixture: The user provided fixture structure. - * @fixture_index: The index (id) of the current fixture. - * @fixture_name: The name of the current fixture (or NULL if unnamed). - * @test_data: Any user private data associated with this test - * @result_set: All the results logged by this test context - * @status: The status of the currently running fixture. - * @expected_status: The expected status on exist of the currently - * running fixture. - * @work: Work item to enqueue onto the work queue to run the test - * @userdata: Structure containing the user data for the test to read - */ -struct kutf_context { - struct kref kref; - struct kutf_suite *suite; - struct kutf_test_fixture *test_fix; - struct kutf_mempool fixture_pool; - void *fixture; - unsigned int fixture_index; - const char *fixture_name; - union kutf_callback_data test_data; - struct kutf_result_set *result_set; - enum kutf_result_status status; - enum kutf_result_status expected_status; - - struct work_struct work; - struct kutf_userdata userdata; -}; - -/** - * struct kutf_suite - Structure representing a kernel test suite - * @app: The application this suite belongs to. - * @name: The name of this suite. - * @suite_data: Any user private data associated with this - * suite. - * @create_fixture: Function used to create a new fixture instance - * @remove_fixture: Function used to destroy a new fixture instance - * @fixture_variants: The number of variants (must be at least 1). - * @suite_default_flags: Suite global filter flags which are set on - * all tests. - * @node: List node for suite_list - * @dir: The debugfs directory for this suite - * @test_list: List head to store all the tests which are - * part of this suite - */ -struct kutf_suite { - struct kutf_application *app; - const char *name; - union kutf_callback_data suite_data; - void *(*create_fixture)(struct kutf_context *context); - void (*remove_fixture)(struct kutf_context *context); - unsigned int fixture_variants; - unsigned int suite_default_flags; - struct list_head node; - struct dentry *dir; - struct list_head test_list; -}; - -/* ============================================================================ - Application functions -============================================================================ */ - -/** - * kutf_create_application() - Create an in kernel test application. - * @name: The name of the test application. - * - * Return: pointer to the kutf_application on success or NULL - * on failure - */ -struct kutf_application *kutf_create_application(const char *name); - -/** - * kutf_destroy_application() - Destroy an in kernel test application. - * - * @app: The test application to destroy. - */ -void kutf_destroy_application(struct kutf_application *app); - -/* ============================================================================ - Suite functions -============================================================================ */ - -/** - * kutf_create_suite() - Create a kernel test suite. - * @app: The test application to create the suite in. - * @name: The name of the suite. - * @fixture_count: The number of fixtures to run over the test - * functions in this suite - * @create_fixture: Callback used to create a fixture. The returned value - * is stored in the fixture pointer in the context for - * use in the test functions. - * @remove_fixture: Callback used to remove a previously created fixture. - * - * Suite names must be unique. Should two suites with the same name be - * registered with the same application then this function will fail, if they - * are registered with different applications then the function will not detect - * this and the call will succeed. - * - * Return: pointer to the created kutf_suite on success or NULL - * on failure - */ -struct kutf_suite *kutf_create_suite( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context)); - -/** - * kutf_create_suite_with_filters() - Create a kernel test suite with user - * defined default filters. - * @app: The test application to create the suite in. - * @name: The name of the suite. - * @fixture_count: The number of fixtures to run over the test - * functions in this suite - * @create_fixture: Callback used to create a fixture. The returned value - * is stored in the fixture pointer in the context for - * use in the test functions. - * @remove_fixture: Callback used to remove a previously created fixture. - * @filters: Filters to apply to a test if it doesn't provide its own - * - * Suite names must be unique. Should two suites with the same name be - * registered with the same application then this function will fail, if they - * are registered with different applications then the function will not detect - * this and the call will succeed. - * - * Return: pointer to the created kutf_suite on success or NULL on failure - */ -struct kutf_suite *kutf_create_suite_with_filters( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters); - -/** - * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with - * user defined default filters. - * @app: The test application to create the suite in. - * @name: The name of the suite. - * @fixture_count: The number of fixtures to run over the test - * functions in this suite - * @create_fixture: Callback used to create a fixture. The returned value - * is stored in the fixture pointer in the context for - * use in the test functions. - * @remove_fixture: Callback used to remove a previously created fixture. - * @filters: Filters to apply to a test if it doesn't provide its own - * @suite_data: Suite specific callback data, provided during the - * running of the test in the kutf_context - * - * Return: pointer to the created kutf_suite on success or NULL - * on failure - */ -struct kutf_suite *kutf_create_suite_with_filters_and_data( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data suite_data); - -/** - * kutf_add_test() - Add a test to a kernel test suite. - * @suite: The suite to add the test to. - * @id: The ID of the test. - * @name: The name of the test. - * @execute: Callback to the test function to run. - * - * Note: As no filters are provided the test will use the suite filters instead - */ -void kutf_add_test(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context)); - -/** - * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters - * @suite: The suite to add the test to. - * @id: The ID of the test. - * @name: The name of the test. - * @execute: Callback to the test function to run. - * @filters: A set of filtering flags, assigning test categories. - */ -void kutf_add_test_with_filters(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters); - -/** - * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite - * with filters. - * @suite: The suite to add the test to. - * @id: The ID of the test. - * @name: The name of the test. - * @execute: Callback to the test function to run. - * @filters: A set of filtering flags, assigning test categories. - * @test_data: Test specific callback data, provided during the - * running of the test in the kutf_context - */ -void kutf_add_test_with_filters_and_data( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data test_data); - - -/* ============================================================================ - Test functions -============================================================================ */ -/** - * kutf_test_log_result_external() - Log a result which has been created - * externally into a in a standard form - * recognized by the log parser. - * @context: The test context the test is running in - * @message: The message for this result - * @new_status: The result status of this log message - */ -void kutf_test_log_result_external( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status); - -/** - * kutf_test_expect_abort() - Tell the kernel that you expect the current - * fixture to produce an abort. - * @context: The test context this test is running in. - */ -void kutf_test_expect_abort(struct kutf_context *context); - -/** - * kutf_test_expect_fatal() - Tell the kernel that you expect the current - * fixture to produce a fatal error. - * @context: The test context this test is running in. - */ -void kutf_test_expect_fatal(struct kutf_context *context); - -/** - * kutf_test_expect_fail() - Tell the kernel that you expect the current - * fixture to fail. - * @context: The test context this test is running in. - */ -void kutf_test_expect_fail(struct kutf_context *context); - -/** - * kutf_test_expect_warn() - Tell the kernel that you expect the current - * fixture to produce a warning. - * @context: The test context this test is running in. - */ -void kutf_test_expect_warn(struct kutf_context *context); - -/** - * kutf_test_expect_pass() - Tell the kernel that you expect the current - * fixture to pass. - * @context: The test context this test is running in. - */ -void kutf_test_expect_pass(struct kutf_context *context); - -/** - * kutf_test_skip() - Tell the kernel that the test should be skipped. - * @context: The test context this test is running in. - */ -void kutf_test_skip(struct kutf_context *context); - -/** - * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, - * supplying a reason string. - * @context: The test context this test is running in. - * @message: A message string containing the reason for the skip. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a prebaked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_skip_msg(struct kutf_context *context, const char *message); - -/** - * kutf_test_pass() - Tell the kernel that this test has passed. - * @context: The test context this test is running in. - * @message: A message string containing the reason for the pass. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_pass(struct kutf_context *context, char const *message); - -/** - * kutf_test_debug() - Send a debug message - * @context: The test context this test is running in. - * @message: A message string containing the debug information. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_debug(struct kutf_context *context, char const *message); - -/** - * kutf_test_info() - Send an information message - * @context: The test context this test is running in. - * @message: A message string containing the information message. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_info(struct kutf_context *context, char const *message); - -/** - * kutf_test_warn() - Send a warning message - * @context: The test context this test is running in. - * @message: A message string containing the warning message. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_warn(struct kutf_context *context, char const *message); - -/** - * kutf_test_fail() - Tell the kernel that a test has failed - * @context: The test context this test is running in. - * @message: A message string containing the failure message. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_fail(struct kutf_context *context, char const *message); - -/** - * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error - * @context: The test context this test is running in. - * @message: A message string containing the fatal error message. - * - * Note: The message must not be freed during the lifetime of the test run. - * This means it should either be a pre-baked string, or if a dynamic string - * is required it must be created with kutf_dsprintf which will store - * the resultant string in a buffer who's lifetime is the same as the test run. - */ -void kutf_test_fatal(struct kutf_context *context, char const *message); - -/** - * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test - * - * @context: The test context this test is running in. - */ -void kutf_test_abort(struct kutf_context *context); - -#endif /* _KERNEL_UTF_SUITE_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_utils.h b/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_utils.h deleted file mode 100755 index 25b8285500d7..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/include/kutf/kutf_utils.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#ifndef _KERNEL_UTF_UTILS_H_ -#define _KERNEL_UTF_UTILS_H_ - -/* kutf_utils.h - * Utilities for the kernel UTF test infrastructure. - * - * This collection of library functions are provided for use by kernel UTF - * and users of kernel UTF which don't directly fit within the other - * code modules. - */ - -#include - -/** - * Maximum size of the message strings within kernel UTF, messages longer then - * this will be truncated. - */ -#define KUTF_MAX_DSPRINTF_LEN 1024 - -/** - * kutf_dsprintf() - dynamic sprintf - * @pool: memory pool to allocate from - * @fmt: The format string describing the string to document. - * @... The parameters to feed in to the format string. - * - * This function implements sprintf which dynamically allocates memory to store - * the string. The library will free the memory containing the string when the - * result set is cleared or destroyed. - * - * Note The returned string may be truncated to fit an internal temporary - * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. - * - * Return: Returns pointer to allocated string, or NULL on error. - */ -const char *kutf_dsprintf(struct kutf_mempool *pool, - const char *fmt, ...); - -#endif /* _KERNEL_UTF_UTILS_H_ */ diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kbuild b/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kbuild deleted file mode 100755 index 2531d41ca28d..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kbuild +++ /dev/null @@ -1,26 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -ccflags-y += -I$(src)/../include - -obj-$(CONFIG_MALI_KUTF) += kutf.o - -kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kconfig b/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kconfig deleted file mode 100755 index 0cdb474c06a3..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - - -config MALI_KUTF - tristate "Mali Kernel Unit Test Framework" - default m - help - Enables MALI testing framework. To compile it as a module, - choose M here - this will generate a single module called kutf. diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Makefile b/drivers/gpu/drm/bifrost/midgard/tests/kutf/Makefile deleted file mode 100755 index d848e8774bd0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# -# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -# linux build system bootstrap for out-of-tree module - -# default to building for the host -ARCH ?= $(shell uname -m) - -ifeq ($(KDIR),) -$(error Must specify KDIR to point to the kernel to target)) -endif - -all: - $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules - -clean: - $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/build.bp b/drivers/gpu/drm/bifrost/midgard/tests/kutf/build.bp deleted file mode 100755 index 960c8faa8df9..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/build.bp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- - */ - -bob_kernel_module { - name: "kutf", - defaults: ["kernel_defaults"], - srcs: [ - "Kbuild", - "kutf_helpers.c", - "kutf_helpers_user.c", - "kutf_mem.c", - "kutf_resultset.c", - "kutf_suite.c", - "kutf_utils.c", - ], - kbuild_options: ["CONFIG_MALI_KUTF=m"], - include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"], - enabled: false, - base_build_kutf: { - enabled: true, - }, -} diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers.c deleted file mode 100755 index cab5add6d93c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF test helpers */ -#include - -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(kutf_input_lock); - -static bool pending_input(struct kutf_context *context) -{ - bool input_pending; - - spin_lock(&kutf_input_lock); - - input_pending = !list_empty(&context->userdata.input_head); - - spin_unlock(&kutf_input_lock); - - return input_pending; -} - -char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) -{ - struct kutf_userdata_line *line; - - spin_lock(&kutf_input_lock); - - while (list_empty(&context->userdata.input_head)) { - int err; - - kutf_set_waiting_for_input(context->result_set); - - spin_unlock(&kutf_input_lock); - - err = wait_event_interruptible(context->userdata.input_waitq, - pending_input(context)); - - if (err) - return ERR_PTR(-EINTR); - - spin_lock(&kutf_input_lock); - } - - line = list_first_entry(&context->userdata.input_head, - struct kutf_userdata_line, node); - if (line->str) { - /* - * Unless it is the end-of-input marker, - * remove it from the list - */ - list_del(&line->node); - } - - spin_unlock(&kutf_input_lock); - - if (str_size) - *str_size = line->size; - return line->str; -} - -int kutf_helper_input_enqueue(struct kutf_context *context, - const char __user *str, size_t size) -{ - struct kutf_userdata_line *line; - - line = kutf_mempool_alloc(&context->fixture_pool, - sizeof(*line) + size + 1); - if (!line) - return -ENOMEM; - if (str) { - unsigned long bytes_not_copied; - - line->size = size; - line->str = (void *)(line + 1); - bytes_not_copied = copy_from_user(line->str, str, size); - if (bytes_not_copied != 0) - return -EFAULT; - /* Zero terminate the string */ - line->str[size] = '\0'; - } else { - /* This is used to mark the end of input */ - WARN_ON(size); - line->size = 0; - line->str = NULL; - } - - spin_lock(&kutf_input_lock); - - list_add_tail(&line->node, &context->userdata.input_head); - - kutf_clear_waiting_for_input(context->result_set); - - spin_unlock(&kutf_input_lock); - - wake_up(&context->userdata.input_waitq); - - return 0; -} - -void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) -{ - kutf_helper_input_enqueue(context, NULL, 0); -} diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers_user.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers_user.c deleted file mode 100755 index 108fa82d9b21..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_helpers_user.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF test helpers that mirror those for kutf-userside */ -#include -#include -#include - -#include -#include -#include - -const char *valtype_names[] = { - "INVALID", - "U64", - "STR", -}; - -static const char *get_val_type_name(enum kutf_helper_valtype valtype) -{ - /* enums can be signed or unsigned (implementation dependant), so - * enforce it to prevent: - * a) "<0 comparison on unsigned type" warning - if we did both upper - * and lower bound check - * b) incorrect range checking if it was a signed type - if we did - * upper bound check only */ - unsigned int type_idx = (unsigned int)valtype; - - if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) - type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID; - - return valtype_names[type_idx]; -} - -/* Check up to str_len chars of val_str to see if it's a valid value name: - * - * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator - * - And, each char is in the character set [A-Z0-9_] */ -static int validate_val_name(const char *val_str, int str_len) -{ - int i = 0; - - for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { - char val_chr = val_str[i]; - - if (val_chr >= 'A' && val_chr <= 'Z') - continue; - if (val_chr >= '0' && val_chr <= '9') - continue; - if (val_chr == '_') - continue; - - /* Character not in the set [A-Z0-9_] - report error */ - return 1; - } - - /* Names of 0 length are not valid */ - if (i == 0) - return 1; - /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ - if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) - return 1; - - return 0; -} - -/* Find the length of the valid part of the string when it will be in quotes - * e.g. "str" - * - * That is, before any '\\', '\n' or '"' characters. This is so we don't have - * to escape the string */ -static int find_quoted_string_valid_len(const char *str) -{ - char *ptr; - const char *check_chars = "\\\n\""; - - ptr = strpbrk(str, check_chars); - if (ptr) - return (int)(ptr-str); - - return (int)strlen(str); -} - -static int kutf_helper_userdata_enqueue(struct kutf_context *context, - const char *str) -{ - char *str_copy; - size_t len; - int err; - - len = strlen(str)+1; - - str_copy = kutf_mempool_alloc(&context->fixture_pool, len); - if (!str_copy) - return -ENOMEM; - - strcpy(str_copy, str); - - err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy); - - return err; -} - -#define MAX_U64_HEX_LEN 16 -/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ -#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) - -int kutf_helper_send_named_u64(struct kutf_context *context, - const char *val_name, u64 val) -{ - int ret = 1; - char msgbuf[NAMED_U64_VAL_BUF_SZ]; - const char *errmsg = NULL; - - if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': Invalid value name", val_name); - goto out_err; - } - - ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); - if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", - val_name, NAMED_U64_VAL_BUF_SZ, ret); - goto out_err; - } - - ret = kutf_helper_userdata_enqueue(context, msgbuf); - if (ret) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': send returned %d", - val_name, ret); - goto out_err; - } - - return ret; -out_err: - kutf_test_fail(context, errmsg); - return ret; -} -EXPORT_SYMBOL(kutf_helper_send_named_u64); - -#define NAMED_VALUE_SEP "=" -#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" -#define NAMED_STR_END_DELIM "\"" - -int kutf_helper_max_str_len_for_kern(const char *val_name, - int kern_buf_sz) -{ - const int val_name_len = strlen(val_name); - const int start_delim_len = strlen(NAMED_STR_START_DELIM); - const int end_delim_len = strlen(NAMED_STR_END_DELIM); - int max_msg_len = kern_buf_sz; - int max_str_len; - - max_str_len = max_msg_len - val_name_len - start_delim_len - - end_delim_len; - - return max_str_len; -} -EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); - -int kutf_helper_send_named_str(struct kutf_context *context, - const char *val_name, - const char *val_str) -{ - int val_str_len; - int str_buf_sz; - char *str_buf = NULL; - int ret = 1; - char *copy_ptr; - int val_name_len; - int start_delim_len = strlen(NAMED_STR_START_DELIM); - int end_delim_len = strlen(NAMED_STR_END_DELIM); - const char *errmsg = NULL; - - if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send u64 value named '%s': Invalid value name", val_name); - goto out_err; - } - val_name_len = strlen(val_name); - - val_str_len = find_quoted_string_valid_len(val_str); - - /* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */ - str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; - - /* Using kmalloc() here instead of mempool since we know we need to free - * before we return */ - str_buf = kmalloc(str_buf_sz, GFP_KERNEL); - if (!str_buf) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", - val_name, str_buf_sz); - goto out_err; - } - copy_ptr = str_buf; - - /* Manually copy each string component instead of snprintf because - * val_str may need to end early, and less error path handling */ - - /* name */ - memcpy(copy_ptr, val_name, val_name_len); - copy_ptr += val_name_len; - - /* str start delimiter */ - memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len); - copy_ptr += start_delim_len; - - /* str value */ - memcpy(copy_ptr, val_str, val_str_len); - copy_ptr += val_str_len; - - /* str end delimiter */ - memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len); - copy_ptr += end_delim_len; - - /* Terminator */ - *copy_ptr = '\0'; - - ret = kutf_helper_userdata_enqueue(context, str_buf); - - if (ret) { - errmsg = kutf_dsprintf(&context->fixture_pool, - "Failed to send str value named '%s': send returned %d", - val_name, ret); - goto out_err; - } - - kfree(str_buf); - return ret; - -out_err: - kutf_test_fail(context, errmsg); - kfree(str_buf); - return ret; -} -EXPORT_SYMBOL(kutf_helper_send_named_str); - -int kutf_helper_receive_named_val( - struct kutf_context *context, - struct kutf_helper_named_val *named_val) -{ - size_t recv_sz; - char *recv_str; - char *search_ptr; - char *name_str = NULL; - int name_len; - int strval_len; - enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID; - char *strval = NULL; - u64 u64val = 0; - int err = KUTF_HELPER_ERR_INVALID_VALUE; - - recv_str = kutf_helper_input_dequeue(context, &recv_sz); - if (!recv_str) - return -EBUSY; - else if (IS_ERR(recv_str)) - return PTR_ERR(recv_str); - - /* Find the '=', grab the name and validate it */ - search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]); - if (search_ptr) { - name_len = search_ptr - recv_str; - if (!validate_val_name(recv_str, name_len)) { - /* no need to reallocate - just modify string in place */ - name_str = recv_str; - name_str[name_len] = '\0'; - - /* Move until after the '=' */ - recv_str += (name_len + 1); - recv_sz -= (name_len + 1); - } - } - if (!name_str) { - pr_err("Invalid name part for received string '%s'\n", - recv_str); - return KUTF_HELPER_ERR_INVALID_NAME; - } - - /* detect value type */ - if (*recv_str == NAMED_STR_START_DELIM[1]) { - /* string delimiter start*/ - ++recv_str; - --recv_sz; - - /* Find end of string */ - search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]); - if (search_ptr) { - strval_len = search_ptr - recv_str; - /* Validate the string to ensure it contains no quotes */ - if (strval_len == find_quoted_string_valid_len(recv_str)) { - /* no need to reallocate - just modify string in place */ - strval = recv_str; - strval[strval_len] = '\0'; - - /* Move until after the end delimiter */ - recv_str += (strval_len + 1); - recv_sz -= (strval_len + 1); - type = KUTF_HELPER_VALTYPE_STR; - } else { - pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); - err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; - } - } else { - pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); - err = KUTF_HELPER_ERR_NO_END_DELIMITER; - } - } else { - /* possibly a number value - strtoull will parse it */ - err = kstrtoull(recv_str, 0, &u64val); - /* unlike userspace can't get an end ptr, but if kstrtoull() - * reads characters after the number it'll report -EINVAL */ - if (!err) { - int len_remain = strnlen(recv_str, recv_sz); - - type = KUTF_HELPER_VALTYPE_U64; - recv_str += len_remain; - recv_sz -= len_remain; - } else { - /* special case: not a number, report as such */ - pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); - } - } - - if (type == KUTF_HELPER_VALTYPE_INVALID) - return err; - - /* Any remaining characters - error */ - if (strnlen(recv_str, recv_sz) != 0) { - pr_err("Characters remain after value of type %s: '%s'\n", - get_val_type_name(type), recv_str); - return KUTF_HELPER_ERR_CHARS_AFTER_VAL; - } - - /* Success - write into the output structure */ - switch (type) { - case KUTF_HELPER_VALTYPE_U64: - named_val->u.val_u64 = u64val; - break; - case KUTF_HELPER_VALTYPE_STR: - named_val->u.val_str = strval; - break; - default: - pr_err("Unreachable, fix kutf_helper_receive_named_val\n"); - /* Coding error, report as though 'run' file failed */ - return -EINVAL; - } - - named_val->val_name = name_str; - named_val->type = type; - - return KUTF_HELPER_ERR_NONE; -} -EXPORT_SYMBOL(kutf_helper_receive_named_val); - -#define DUMMY_MSG "" -int kutf_helper_receive_check_val( - struct kutf_helper_named_val *named_val, - struct kutf_context *context, - const char *expect_val_name, - enum kutf_helper_valtype expect_val_type) -{ - int err; - - err = kutf_helper_receive_named_val(context, named_val); - if (err < 0) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Failed to receive value named '%s'", - expect_val_name); - kutf_test_fail(context, msg); - return err; - } else if (err > 0) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Named-value parse error when expecting value named '%s'", - expect_val_name); - kutf_test_fail(context, msg); - goto out_fail_and_fixup; - } - - if (strcmp(named_val->val_name, expect_val_name) != 0) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Expecting to receive value named '%s' but got '%s'", - expect_val_name, named_val->val_name); - kutf_test_fail(context, msg); - goto out_fail_and_fixup; - } - - - if (named_val->type != expect_val_type) { - const char *msg = kutf_dsprintf(&context->fixture_pool, - "Expecting value named '%s' to be of type %s but got %s", - expect_val_name, get_val_type_name(expect_val_type), - get_val_type_name(named_val->type)); - kutf_test_fail(context, msg); - goto out_fail_and_fixup; - } - - return err; - -out_fail_and_fixup: - /* Produce a valid but incorrect value */ - switch (expect_val_type) { - case KUTF_HELPER_VALTYPE_U64: - named_val->u.val_u64 = 0ull; - break; - case KUTF_HELPER_VALTYPE_STR: - { - char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); - - if (!str) - return -1; - - strcpy(str, DUMMY_MSG); - named_val->u.val_str = str; - break; - } - default: - break; - } - - /* Indicate that this is invalid */ - named_val->type = KUTF_HELPER_VALTYPE_INVALID; - - /* But at least allow the caller to continue in the test with failures */ - return 0; -} -EXPORT_SYMBOL(kutf_helper_receive_check_val); - -void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val) -{ - switch (named_val->type) { - case KUTF_HELPER_VALTYPE_U64: - pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64); - break; - case KUTF_HELPER_VALTYPE_STR: - pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str); - break; - case KUTF_HELPER_VALTYPE_INVALID: - pr_warn("%s is invalid\n", named_val->val_name); - break; - default: - pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type); - break; - } -} -EXPORT_SYMBOL(kutf_helper_output_named_val); diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_mem.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_mem.c deleted file mode 100755 index fd98beaeb84a..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_mem.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF memory management functions */ - -#include -#include -#include - -#include - - -/** - * struct kutf_alloc_entry - Structure representing an allocation. - * @node: List node for use with kutf_mempool. - * @data: Data area of the allocation - */ -struct kutf_alloc_entry { - struct list_head node; - u8 data[0]; -}; - -int kutf_mempool_init(struct kutf_mempool *pool) -{ - if (!pool) { - pr_err("NULL pointer passed to %s\n", __func__); - return -1; - } - - INIT_LIST_HEAD(&pool->head); - mutex_init(&pool->lock); - - return 0; -} -EXPORT_SYMBOL(kutf_mempool_init); - -void kutf_mempool_destroy(struct kutf_mempool *pool) -{ - struct list_head *remove; - struct list_head *tmp; - - if (!pool) { - pr_err("NULL pointer passed to %s\n", __func__); - return; - } - - mutex_lock(&pool->lock); - list_for_each_safe(remove, tmp, &pool->head) { - struct kutf_alloc_entry *remove_alloc; - - remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); - list_del(&remove_alloc->node); - kfree(remove_alloc); - } - mutex_unlock(&pool->lock); - -} -EXPORT_SYMBOL(kutf_mempool_destroy); - -void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) -{ - struct kutf_alloc_entry *ret; - - if (!pool) { - pr_err("NULL pointer passed to %s\n", __func__); - goto fail_pool; - } - - mutex_lock(&pool->lock); - - ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); - if (!ret) { - pr_err("Failed to allocate memory\n"); - goto fail_alloc; - } - - INIT_LIST_HEAD(&ret->node); - list_add(&ret->node, &pool->head); - - mutex_unlock(&pool->lock); - - return &ret->data[0]; - -fail_alloc: - mutex_unlock(&pool->lock); -fail_pool: - return NULL; -} -EXPORT_SYMBOL(kutf_mempool_alloc); diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_resultset.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_resultset.c deleted file mode 100755 index 94ecfa4421e1..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_resultset.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF result management functions */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* Lock to protect all result structures */ -static DEFINE_SPINLOCK(kutf_result_lock); - -struct kutf_result_set *kutf_create_result_set(void) -{ - struct kutf_result_set *set; - - set = kmalloc(sizeof(*set), GFP_KERNEL); - if (!set) { - pr_err("Failed to allocate resultset"); - goto fail_alloc; - } - - INIT_LIST_HEAD(&set->results); - init_waitqueue_head(&set->waitq); - set->flags = 0; - - return set; - -fail_alloc: - return NULL; -} - -int kutf_add_result(struct kutf_context *context, - enum kutf_result_status status, - const char *message) -{ - struct kutf_mempool *mempool = &context->fixture_pool; - struct kutf_result_set *set = context->result_set; - /* Create the new result */ - struct kutf_result *new_result; - - BUG_ON(set == NULL); - - new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); - if (!new_result) { - pr_err("Result allocation failed\n"); - return -ENOMEM; - } - - INIT_LIST_HEAD(&new_result->node); - new_result->status = status; - new_result->message = message; - - spin_lock(&kutf_result_lock); - - list_add_tail(&new_result->node, &set->results); - - spin_unlock(&kutf_result_lock); - - wake_up(&set->waitq); - - return 0; -} - -void kutf_destroy_result_set(struct kutf_result_set *set) -{ - if (!list_empty(&set->results)) - pr_err("kutf_destroy_result_set: Unread results from test\n"); - - kfree(set); -} - -static bool kutf_has_result(struct kutf_result_set *set) -{ - bool has_result; - - spin_lock(&kutf_result_lock); - if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) - /* Pretend there are results if waiting for input */ - has_result = true; - else - has_result = !list_empty(&set->results); - spin_unlock(&kutf_result_lock); - - return has_result; -} - -struct kutf_result *kutf_remove_result(struct kutf_result_set *set) -{ - struct kutf_result *result = NULL; - int ret; - - do { - ret = wait_event_interruptible(set->waitq, - kutf_has_result(set)); - - if (ret) - return ERR_PTR(ret); - - spin_lock(&kutf_result_lock); - - if (!list_empty(&set->results)) { - result = list_first_entry(&set->results, - struct kutf_result, - node); - list_del(&result->node); - } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { - /* Return a fake result */ - static struct kutf_result waiting = { - .status = KUTF_RESULT_USERDATA_WAIT - }; - result = &waiting; - } - /* If result == NULL then there was a race with the event - * being removed between the check in kutf_has_result and - * the lock being obtained. In this case we retry - */ - - spin_unlock(&kutf_result_lock); - } while (result == NULL); - - return result; -} - -void kutf_set_waiting_for_input(struct kutf_result_set *set) -{ - spin_lock(&kutf_result_lock); - set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT; - spin_unlock(&kutf_result_lock); - - wake_up(&set->waitq); -} - -void kutf_clear_waiting_for_input(struct kutf_result_set *set) -{ - spin_lock(&kutf_result_lock); - set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT; - spin_unlock(&kutf_result_lock); -} diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_suite.c deleted file mode 100755 index f3a8e9b23f4c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_suite.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF suite, test and fixture management including user to kernel - * interaction */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#if defined(CONFIG_DEBUG_FS) - -/** - * struct kutf_application - Structure which represents kutf application - * @name: The name of this test application. - * @dir: The debugfs directory for this test - * @suite_list: List head to store all the suites which are part of this - * application - */ -struct kutf_application { - const char *name; - struct dentry *dir; - struct list_head suite_list; -}; - -/** - * struct kutf_test_function - Structure which represents kutf test function - * @suite: Back reference to the suite this test function - * belongs to - * @filters: Filters that apply to this test function - * @test_id: Test ID - * @execute: Function to run for this test - * @test_data: Static data for this test - * @node: List node for test_list - * @variant_list: List head to store all the variants which can run on - * this function - * @dir: debugfs directory for this test function - */ -struct kutf_test_function { - struct kutf_suite *suite; - unsigned int filters; - unsigned int test_id; - void (*execute)(struct kutf_context *context); - union kutf_callback_data test_data; - struct list_head node; - struct list_head variant_list; - struct dentry *dir; -}; - -/** - * struct kutf_test_fixture - Structure which holds information on the kutf - * test fixture - * @test_func: Test function this fixture belongs to - * @fixture_index: Index of this fixture - * @node: List node for variant_list - * @dir: debugfs directory for this test fixture - */ -struct kutf_test_fixture { - struct kutf_test_function *test_func; - unsigned int fixture_index; - struct list_head node; - struct dentry *dir; -}; - -static struct dentry *base_dir; -static struct workqueue_struct *kutf_workq; - -/** - * struct kutf_convert_table - Structure which keeps test results - * @result_name: Status of the test result - * @result: Status value for a single test - */ -struct kutf_convert_table { - char result_name[50]; - enum kutf_result_status result; -}; - -struct kutf_convert_table kutf_convert[] = { -#define ADD_UTF_RESULT(_name) \ -{ \ - #_name, \ - _name, \ -}, -ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) -ADD_UTF_RESULT(KUTF_RESULT_SKIP) -ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) -ADD_UTF_RESULT(KUTF_RESULT_PASS) -ADD_UTF_RESULT(KUTF_RESULT_DEBUG) -ADD_UTF_RESULT(KUTF_RESULT_INFO) -ADD_UTF_RESULT(KUTF_RESULT_WARN) -ADD_UTF_RESULT(KUTF_RESULT_FAIL) -ADD_UTF_RESULT(KUTF_RESULT_FATAL) -ADD_UTF_RESULT(KUTF_RESULT_ABORT) -}; - -#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) - -/** - * kutf_create_context() - Create a test context in which a specific fixture - * of an application will be run and its results - * reported back to the user - * @test_fix: Test fixture to be run. - * - * The context's refcount will be initialized to 1. - * - * Return: Returns the created test context on success or NULL on failure - */ -static struct kutf_context *kutf_create_context( - struct kutf_test_fixture *test_fix); - -/** - * kutf_destroy_context() - Destroy a previously created test context, only - * once its refcount has become zero - * @kref: pointer to kref member within the context - * - * This should only be used via a kref_put() call on the context's kref member - */ -static void kutf_destroy_context(struct kref *kref); - -/** - * kutf_context_get() - increment refcount on a context - * @context: the kutf context - * - * This must be used when the lifetime of the context might exceed that of the - * thread creating @context - */ -static void kutf_context_get(struct kutf_context *context); - -/** - * kutf_context_put() - decrement refcount on a context, destroying it when it - * reached zero - * @context: the kutf context - * - * This must be used only after a corresponding kutf_context_get() call on - * @context, and the caller no longer needs access to @context. - */ -static void kutf_context_put(struct kutf_context *context); - -/** - * kutf_set_result() - Set the test result against the specified test context - * @context: Test context - * @status: Result status - */ -static void kutf_set_result(struct kutf_context *context, - enum kutf_result_status status); - -/** - * kutf_set_expected_result() - Set the expected test result for the specified - * test context - * @context: Test context - * @expected_status: Expected result status - */ -static void kutf_set_expected_result(struct kutf_context *context, - enum kutf_result_status expected_status); - -/** - * kutf_result_to_string() - Converts a KUTF result into a string - * @result_str: Output result string - * @result: Result status to convert - * - * Return: 1 if test result was successfully converted to string, 0 otherwise - */ -static int kutf_result_to_string(char **result_str, - enum kutf_result_status result) -{ - int i; - int ret = 0; - - for (i = 0; i < UTF_CONVERT_SIZE; i++) { - if (result == kutf_convert[i].result) { - *result_str = kutf_convert[i].result_name; - ret = 1; - } - } - return ret; -} - -/** - * kutf_debugfs_const_string_read() - Simple debugfs read callback which - * returns a constant string - * @file: Opened file to read from - * @buf: User buffer to write the data into - * @len: Amount of data to read - * @ppos: Offset into file to read from - * - * Return: On success, the number of bytes read and offset @ppos advanced by - * this number; on error, negative value - */ -static ssize_t kutf_debugfs_const_string_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) -{ - char *str = file->private_data; - - return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); -} - -static const struct file_operations kutf_debugfs_const_string_ops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = kutf_debugfs_const_string_read, - .llseek = default_llseek, -}; - -/** - * kutf_add_explicit_result() - Check if an explicit result needs to be added - * @context: KUTF test context - */ -static void kutf_add_explicit_result(struct kutf_context *context) -{ - switch (context->expected_status) { - case KUTF_RESULT_UNKNOWN: - break; - - case KUTF_RESULT_WARN: - if (context->status == KUTF_RESULT_WARN) - kutf_test_pass(context, - "Pass (expected warn occurred)"); - else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected warn missing)"); - break; - - case KUTF_RESULT_FAIL: - if (context->status == KUTF_RESULT_FAIL) - kutf_test_pass(context, - "Pass (expected fail occurred)"); - else if (context->status != KUTF_RESULT_SKIP) { - /* Force the expected status so the fail gets logged */ - context->expected_status = KUTF_RESULT_PASS; - kutf_test_fail(context, - "Fail (expected fail missing)"); - } - break; - - case KUTF_RESULT_FATAL: - if (context->status == KUTF_RESULT_FATAL) - kutf_test_pass(context, - "Pass (expected fatal occurred)"); - else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected fatal missing)"); - break; - - case KUTF_RESULT_ABORT: - if (context->status == KUTF_RESULT_ABORT) - kutf_test_pass(context, - "Pass (expected abort occurred)"); - else if (context->status != KUTF_RESULT_SKIP) - kutf_test_fail(context, - "Fail (expected abort missing)"); - break; - default: - break; - } -} - -static void kutf_run_test(struct work_struct *data) -{ - struct kutf_context *test_context = container_of(data, - struct kutf_context, work); - struct kutf_suite *suite = test_context->suite; - struct kutf_test_function *test_func; - - test_func = test_context->test_fix->test_func; - - /* - * Call the create fixture function if required before the - * fixture is run - */ - if (suite->create_fixture) - test_context->fixture = suite->create_fixture(test_context); - - /* Only run the test if the fixture was created (if required) */ - if ((suite->create_fixture && test_context->fixture) || - (!suite->create_fixture)) { - /* Run this fixture */ - test_func->execute(test_context); - - if (suite->remove_fixture) - suite->remove_fixture(test_context); - - kutf_add_explicit_result(test_context); - } - - kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL); - - kutf_context_put(test_context); -} - -/** - * kutf_debugfs_run_open() Debugfs open callback for the "run" entry. - * @inode: inode of the opened file - * @file: Opened file to read from - * - * This function creates a KUTF context and queues it onto a workqueue to be - * run asynchronously. The resulting file descriptor can be used to communicate - * userdata to the test and to read back the results of the test execution. - * - * Return: 0 on success - */ -static int kutf_debugfs_run_open(struct inode *inode, struct file *file) -{ - struct kutf_test_fixture *test_fix = inode->i_private; - struct kutf_context *test_context; - int err = 0; - - test_context = kutf_create_context(test_fix); - if (!test_context) { - err = -ENOMEM; - goto finish; - } - - file->private_data = test_context; - - /* This reference is release by the kutf_run_test */ - kutf_context_get(test_context); - - queue_work(kutf_workq, &test_context->work); - -finish: - return err; -} - -#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n" - -/** - * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. - * @file: Opened file to read from - * @buf: User buffer to write the data into - * @len: Amount of data to read - * @ppos: Offset into file to read from - * - * This function emits the results of the test, blocking until they are - * available. - * - * If the test involves user data then this will also return user data records - * to user space. If the test is waiting for user data then this function will - * output a message (to make the likes of 'cat' display it), followed by - * returning 0 to mark the end of file. - * - * Results will be emitted one at a time, once all the results have been read - * 0 will be returned to indicate there is no more data. - * - * Return: Number of bytes read. - */ -static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - struct kutf_context *test_context = file->private_data; - struct kutf_result *res; - unsigned long bytes_not_copied; - ssize_t bytes_copied = 0; - char *kutf_str_ptr = NULL; - size_t kutf_str_len = 0; - size_t message_len = 0; - char separator = ':'; - char terminator = '\n'; - - res = kutf_remove_result(test_context->result_set); - - if (IS_ERR(res)) - return PTR_ERR(res); - - /* - * Handle 'fake' results - these results are converted to another - * form before being returned from the kernel - */ - switch (res->status) { - case KUTF_RESULT_TEST_FINISHED: - return 0; - case KUTF_RESULT_USERDATA_WAIT: - if (test_context->userdata.flags & - KUTF_USERDATA_WARNING_OUTPUT) { - /* - * Warning message already output, - * signal end-of-file - */ - return 0; - } - - message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; - if (message_len > len) - message_len = len; - - bytes_not_copied = copy_to_user(buf, - USERDATA_WARNING_MESSAGE, - message_len); - if (bytes_not_copied != 0) - return -EFAULT; - test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; - return message_len; - case KUTF_RESULT_USERDATA: - message_len = strlen(res->message); - if (message_len > len-1) { - message_len = len-1; - pr_warn("User data truncated, read not long enough\n"); - } - bytes_not_copied = copy_to_user(buf, res->message, - message_len); - if (bytes_not_copied != 0) { - pr_warn("Failed to copy data to user space buffer\n"); - return -EFAULT; - } - /* Finally the terminator */ - bytes_not_copied = copy_to_user(&buf[message_len], - &terminator, 1); - if (bytes_not_copied != 0) { - pr_warn("Failed to copy data to user space buffer\n"); - return -EFAULT; - } - return message_len+1; - default: - /* Fall through - this is a test result */ - break; - } - - /* Note: This code assumes a result is read completely */ - kutf_result_to_string(&kutf_str_ptr, res->status); - if (kutf_str_ptr) - kutf_str_len = strlen(kutf_str_ptr); - - if (res->message) - message_len = strlen(res->message); - - if ((kutf_str_len + 1 + message_len + 1) > len) { - pr_err("Not enough space in user buffer for a single result"); - return 0; - } - - /* First copy the result string */ - if (kutf_str_ptr) { - bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, - kutf_str_len); - bytes_copied += kutf_str_len - bytes_not_copied; - if (bytes_not_copied) - goto exit; - } - - /* Then the separator */ - bytes_not_copied = copy_to_user(&buf[bytes_copied], - &separator, 1); - bytes_copied += 1 - bytes_not_copied; - if (bytes_not_copied) - goto exit; - - /* Finally Next copy the result string */ - if (res->message) { - bytes_not_copied = copy_to_user(&buf[bytes_copied], - res->message, message_len); - bytes_copied += message_len - bytes_not_copied; - if (bytes_not_copied) - goto exit; - } - - /* Finally the terminator */ - bytes_not_copied = copy_to_user(&buf[bytes_copied], - &terminator, 1); - bytes_copied += 1 - bytes_not_copied; - -exit: - return bytes_copied; -} - -/** - * kutf_debugfs_run_write() Debugfs write callback for the "run" entry. - * @file: Opened file to write to - * @buf: User buffer to read the data from - * @len: Amount of data to write - * @ppos: Offset into file to write to - * - * This function allows user and kernel to exchange extra data necessary for - * the test fixture. - * - * The data is added to the first struct kutf_context running the fixture - * - * Return: Number of bytes written - */ -static ssize_t kutf_debugfs_run_write(struct file *file, - const char __user *buf, size_t len, loff_t *ppos) -{ - int ret = 0; - struct kutf_context *test_context = file->private_data; - - if (len > KUTF_MAX_LINE_LENGTH) - return -EINVAL; - - ret = kutf_helper_input_enqueue(test_context, buf, len); - if (ret < 0) - return ret; - - return len; -} - -/** - * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. - * @inode: File entry representation - * @file: A specific opening of the file - * - * Release any resources that were created during the opening of the file - * - * Note that resources may not be released immediately, that might only happen - * later when other users of the kutf_context release their refcount. - * - * Return: 0 on success - */ -static int kutf_debugfs_run_release(struct inode *inode, struct file *file) -{ - struct kutf_context *test_context = file->private_data; - - kutf_helper_input_enqueue_end_of_data(test_context); - - kutf_context_put(test_context); - return 0; -} - -static const struct file_operations kutf_debugfs_run_ops = { - .owner = THIS_MODULE, - .open = kutf_debugfs_run_open, - .read = kutf_debugfs_run_read, - .write = kutf_debugfs_run_write, - .release = kutf_debugfs_run_release, - .llseek = default_llseek, -}; - -/** - * create_fixture_variant() - Creates a fixture variant for the specified - * test function and index and the debugfs entries - * that represent it. - * @test_func: Test function - * @fixture_index: Fixture index - * - * Return: 0 on success, negative value corresponding to error code in failure - */ -static int create_fixture_variant(struct kutf_test_function *test_func, - unsigned int fixture_index) -{ - struct kutf_test_fixture *test_fix; - char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ - struct dentry *tmp; - int err; - - test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); - if (!test_fix) { - pr_err("Failed to create debugfs directory when adding fixture\n"); - err = -ENOMEM; - goto fail_alloc; - } - - test_fix->test_func = test_func; - test_fix->fixture_index = fixture_index; - - snprintf(name, sizeof(name), "%d", fixture_index); - test_fix->dir = debugfs_create_dir(name, test_func->dir); - if (!test_func->dir) { - pr_err("Failed to create debugfs directory when adding fixture\n"); - /* Might not be the right error, we don't get it passed back to us */ - err = -EEXIST; - goto fail_dir; - } - - tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n", - &kutf_debugfs_const_string_ops); - if (!tmp) { - pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); - /* Might not be the right error, we don't get it passed back to us */ - err = -EEXIST; - goto fail_file; - } - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) - tmp = debugfs_create_file_unsafe( -#else - tmp = debugfs_create_file( -#endif - "run", 0600, test_fix->dir, - test_fix, - &kutf_debugfs_run_ops); - if (!tmp) { - pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); - /* Might not be the right error, we don't get it passed back to us */ - err = -EEXIST; - goto fail_file; - } - - list_add(&test_fix->node, &test_func->variant_list); - return 0; - -fail_file: - debugfs_remove_recursive(test_fix->dir); -fail_dir: - kfree(test_fix); -fail_alloc: - return err; -} - -/** - * kutf_remove_test_variant() - Destroy a previously created fixture variant. - * @test_fix: Test fixture - */ -static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) -{ - debugfs_remove_recursive(test_fix->dir); - kfree(test_fix); -} - -void kutf_add_test_with_filters_and_data( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data test_data) -{ - struct kutf_test_function *test_func; - struct dentry *tmp; - unsigned int i; - - test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); - if (!test_func) { - pr_err("Failed to allocate memory when adding test %s\n", name); - goto fail_alloc; - } - - INIT_LIST_HEAD(&test_func->variant_list); - - test_func->dir = debugfs_create_dir(name, suite->dir); - if (!test_func->dir) { - pr_err("Failed to create debugfs directory when adding test %s\n", name); - goto fail_dir; - } - - tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n", - &kutf_debugfs_const_string_ops); - if (!tmp) { - pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); - goto fail_file; - } - - test_func->filters = filters; - tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, - &test_func->filters); - if (!tmp) { - pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); - goto fail_file; - } - - test_func->test_id = id; - tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, - &test_func->test_id); - if (!tmp) { - pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); - goto fail_file; - } - - for (i = 0; i < suite->fixture_variants; i++) { - if (create_fixture_variant(test_func, i)) { - pr_err("Failed to create fixture %d when adding test %s\n", i, name); - goto fail_file; - } - } - - test_func->suite = suite; - test_func->execute = execute; - test_func->test_data = test_data; - - list_add(&test_func->node, &suite->test_list); - return; - -fail_file: - debugfs_remove_recursive(test_func->dir); -fail_dir: - kfree(test_func); -fail_alloc: - return; -} -EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); - -void kutf_add_test_with_filters( - struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context), - unsigned int filters) -{ - union kutf_callback_data data; - - data.ptr_value = NULL; - - kutf_add_test_with_filters_and_data(suite, - id, - name, - execute, - suite->suite_default_flags, - data); -} -EXPORT_SYMBOL(kutf_add_test_with_filters); - -void kutf_add_test(struct kutf_suite *suite, - unsigned int id, - const char *name, - void (*execute)(struct kutf_context *context)) -{ - union kutf_callback_data data; - - data.ptr_value = NULL; - - kutf_add_test_with_filters_and_data(suite, - id, - name, - execute, - suite->suite_default_flags, - data); -} -EXPORT_SYMBOL(kutf_add_test); - -/** - * kutf_remove_test(): Remove a previously added test function. - * @test_func: Test function - */ -static void kutf_remove_test(struct kutf_test_function *test_func) -{ - struct list_head *pos; - struct list_head *tmp; - - list_for_each_safe(pos, tmp, &test_func->variant_list) { - struct kutf_test_fixture *test_fix; - - test_fix = list_entry(pos, struct kutf_test_fixture, node); - kutf_remove_test_variant(test_fix); - } - - list_del(&test_func->node); - debugfs_remove_recursive(test_func->dir); - kfree(test_func); -} - -struct kutf_suite *kutf_create_suite_with_filters_and_data( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters, - union kutf_callback_data suite_data) -{ - struct kutf_suite *suite; - struct dentry *tmp; - - suite = kmalloc(sizeof(*suite), GFP_KERNEL); - if (!suite) { - pr_err("Failed to allocate memory when creating suite %s\n", name); - goto fail_kmalloc; - } - - suite->dir = debugfs_create_dir(name, app->dir); - if (!suite->dir) { - pr_err("Failed to create debugfs directory when adding test %s\n", name); - goto fail_debugfs; - } - - tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n", - &kutf_debugfs_const_string_ops); - if (!tmp) { - pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); - goto fail_file; - } - - INIT_LIST_HEAD(&suite->test_list); - suite->app = app; - suite->name = name; - suite->fixture_variants = fixture_count; - suite->create_fixture = create_fixture; - suite->remove_fixture = remove_fixture; - suite->suite_default_flags = filters; - suite->suite_data = suite_data; - - list_add(&suite->node, &app->suite_list); - - return suite; - -fail_file: - debugfs_remove_recursive(suite->dir); -fail_debugfs: - kfree(suite); -fail_kmalloc: - return NULL; -} -EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); - -struct kutf_suite *kutf_create_suite_with_filters( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context), - unsigned int filters) -{ - union kutf_callback_data data; - - data.ptr_value = NULL; - return kutf_create_suite_with_filters_and_data(app, - name, - fixture_count, - create_fixture, - remove_fixture, - filters, - data); -} -EXPORT_SYMBOL(kutf_create_suite_with_filters); - -struct kutf_suite *kutf_create_suite( - struct kutf_application *app, - const char *name, - unsigned int fixture_count, - void *(*create_fixture)(struct kutf_context *context), - void (*remove_fixture)(struct kutf_context *context)) -{ - union kutf_callback_data data; - - data.ptr_value = NULL; - return kutf_create_suite_with_filters_and_data(app, - name, - fixture_count, - create_fixture, - remove_fixture, - KUTF_F_TEST_GENERIC, - data); -} -EXPORT_SYMBOL(kutf_create_suite); - -/** - * kutf_destroy_suite() - Destroy a previously added test suite. - * @suite: Test suite - */ -static void kutf_destroy_suite(struct kutf_suite *suite) -{ - struct list_head *pos; - struct list_head *tmp; - - list_for_each_safe(pos, tmp, &suite->test_list) { - struct kutf_test_function *test_func; - - test_func = list_entry(pos, struct kutf_test_function, node); - kutf_remove_test(test_func); - } - - list_del(&suite->node); - debugfs_remove_recursive(suite->dir); - kfree(suite); -} - -struct kutf_application *kutf_create_application(const char *name) -{ - struct kutf_application *app; - struct dentry *tmp; - - app = kmalloc(sizeof(*app), GFP_KERNEL); - if (!app) { - pr_err("Failed to create allocate memory when creating application %s\n", name); - goto fail_kmalloc; - } - - app->dir = debugfs_create_dir(name, base_dir); - if (!app->dir) { - pr_err("Failed to create debugfs direcotry when creating application %s\n", name); - goto fail_debugfs; - } - - tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n", - &kutf_debugfs_const_string_ops); - if (!tmp) { - pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); - goto fail_file; - } - - INIT_LIST_HEAD(&app->suite_list); - app->name = name; - - return app; - -fail_file: - debugfs_remove_recursive(app->dir); -fail_debugfs: - kfree(app); -fail_kmalloc: - return NULL; -} -EXPORT_SYMBOL(kutf_create_application); - -void kutf_destroy_application(struct kutf_application *app) -{ - struct list_head *pos; - struct list_head *tmp; - - list_for_each_safe(pos, tmp, &app->suite_list) { - struct kutf_suite *suite; - - suite = list_entry(pos, struct kutf_suite, node); - kutf_destroy_suite(suite); - } - - debugfs_remove_recursive(app->dir); - kfree(app); -} -EXPORT_SYMBOL(kutf_destroy_application); - -static struct kutf_context *kutf_create_context( - struct kutf_test_fixture *test_fix) -{ - struct kutf_context *new_context; - - new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); - if (!new_context) { - pr_err("Failed to allocate test context"); - goto fail_alloc; - } - - new_context->result_set = kutf_create_result_set(); - if (!new_context->result_set) { - pr_err("Failed to create result set"); - goto fail_result_set; - } - - new_context->test_fix = test_fix; - /* Save the pointer to the suite as the callbacks will require it */ - new_context->suite = test_fix->test_func->suite; - new_context->status = KUTF_RESULT_UNKNOWN; - new_context->expected_status = KUTF_RESULT_UNKNOWN; - - kutf_mempool_init(&new_context->fixture_pool); - new_context->fixture = NULL; - new_context->fixture_index = test_fix->fixture_index; - new_context->fixture_name = NULL; - new_context->test_data = test_fix->test_func->test_data; - - new_context->userdata.flags = 0; - INIT_LIST_HEAD(&new_context->userdata.input_head); - init_waitqueue_head(&new_context->userdata.input_waitq); - - INIT_WORK(&new_context->work, kutf_run_test); - - kref_init(&new_context->kref); - - return new_context; - -fail_result_set: - kfree(new_context); -fail_alloc: - return NULL; -} - -static void kutf_destroy_context(struct kref *kref) -{ - struct kutf_context *context; - - context = container_of(kref, struct kutf_context, kref); - kutf_destroy_result_set(context->result_set); - kutf_mempool_destroy(&context->fixture_pool); - kfree(context); -} - -static void kutf_context_get(struct kutf_context *context) -{ - kref_get(&context->kref); -} - -static void kutf_context_put(struct kutf_context *context) -{ - kref_put(&context->kref, kutf_destroy_context); -} - - -static void kutf_set_result(struct kutf_context *context, - enum kutf_result_status status) -{ - context->status = status; -} - -static void kutf_set_expected_result(struct kutf_context *context, - enum kutf_result_status expected_status) -{ - context->expected_status = expected_status; -} - -/** - * kutf_test_log_result() - Log a result for the specified test context - * @context: Test context - * @message: Result string - * @new_status: Result status - */ -static void kutf_test_log_result( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status) -{ - if (context->status < new_status) - context->status = new_status; - - if (context->expected_status != new_status) - kutf_add_result(context, new_status, message); -} - -void kutf_test_log_result_external( - struct kutf_context *context, - const char *message, - enum kutf_result_status new_status) -{ - kutf_test_log_result(context, message, new_status); -} -EXPORT_SYMBOL(kutf_test_log_result_external); - -void kutf_test_expect_abort(struct kutf_context *context) -{ - kutf_set_expected_result(context, KUTF_RESULT_ABORT); -} -EXPORT_SYMBOL(kutf_test_expect_abort); - -void kutf_test_expect_fatal(struct kutf_context *context) -{ - kutf_set_expected_result(context, KUTF_RESULT_FATAL); -} -EXPORT_SYMBOL(kutf_test_expect_fatal); - -void kutf_test_expect_fail(struct kutf_context *context) -{ - kutf_set_expected_result(context, KUTF_RESULT_FAIL); -} -EXPORT_SYMBOL(kutf_test_expect_fail); - -void kutf_test_expect_warn(struct kutf_context *context) -{ - kutf_set_expected_result(context, KUTF_RESULT_WARN); -} -EXPORT_SYMBOL(kutf_test_expect_warn); - -void kutf_test_expect_pass(struct kutf_context *context) -{ - kutf_set_expected_result(context, KUTF_RESULT_PASS); -} -EXPORT_SYMBOL(kutf_test_expect_pass); - -void kutf_test_skip(struct kutf_context *context) -{ - kutf_set_result(context, KUTF_RESULT_SKIP); - kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); - - kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); -} -EXPORT_SYMBOL(kutf_test_skip); - -void kutf_test_skip_msg(struct kutf_context *context, const char *message) -{ - kutf_set_result(context, KUTF_RESULT_SKIP); - kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); - - kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, - "Test skipped: %s", message), KUTF_RESULT_SKIP); - kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); -} -EXPORT_SYMBOL(kutf_test_skip_msg); - -void kutf_test_debug(struct kutf_context *context, char const *message) -{ - kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); -} -EXPORT_SYMBOL(kutf_test_debug); - -void kutf_test_pass(struct kutf_context *context, char const *message) -{ - static const char explicit_message[] = "(explicit pass)"; - - if (!message) - message = explicit_message; - - kutf_test_log_result(context, message, KUTF_RESULT_PASS); -} -EXPORT_SYMBOL(kutf_test_pass); - -void kutf_test_info(struct kutf_context *context, char const *message) -{ - kutf_test_log_result(context, message, KUTF_RESULT_INFO); -} -EXPORT_SYMBOL(kutf_test_info); - -void kutf_test_warn(struct kutf_context *context, char const *message) -{ - kutf_test_log_result(context, message, KUTF_RESULT_WARN); -} -EXPORT_SYMBOL(kutf_test_warn); - -void kutf_test_fail(struct kutf_context *context, char const *message) -{ - kutf_test_log_result(context, message, KUTF_RESULT_FAIL); -} -EXPORT_SYMBOL(kutf_test_fail); - -void kutf_test_fatal(struct kutf_context *context, char const *message) -{ - kutf_test_log_result(context, message, KUTF_RESULT_FATAL); -} -EXPORT_SYMBOL(kutf_test_fatal); - -void kutf_test_abort(struct kutf_context *context) -{ - kutf_test_log_result(context, "", KUTF_RESULT_ABORT); -} -EXPORT_SYMBOL(kutf_test_abort); - -/** - * init_kutf_core() - Module entry point. - * - * Create the base entry point in debugfs. - */ -static int __init init_kutf_core(void) -{ - kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1); - if (!kutf_workq) - return -ENOMEM; - - base_dir = debugfs_create_dir("kutf_tests", NULL); - if (!base_dir) { - destroy_workqueue(kutf_workq); - kutf_workq = NULL; - return -ENOMEM; - } - - return 0; -} - -/** - * exit_kutf_core() - Module exit point. - * - * Remove the base entry point in debugfs. - */ -static void __exit exit_kutf_core(void) -{ - debugfs_remove_recursive(base_dir); - - if (kutf_workq) - destroy_workqueue(kutf_workq); -} - -#else /* defined(CONFIG_DEBUG_FS) */ - -/** - * init_kutf_core() - Module entry point. - * - * Stub for when build against a kernel without debugfs support - */ -static int __init init_kutf_core(void) -{ - pr_debug("KUTF requires a kernel with debug fs support"); - - return -ENODEV; -} - -/** - * exit_kutf_core() - Module exit point. - * - * Stub for when build against a kernel without debugfs support - */ -static void __exit exit_kutf_core(void) -{ -} -#endif /* defined(CONFIG_DEBUG_FS) */ - -MODULE_LICENSE("GPL"); - -module_init(init_kutf_core); -module_exit(exit_kutf_core); diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_utils.c b/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_utils.c deleted file mode 100755 index 7f5ac517fdb4..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/kutf_utils.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -/* Kernel UTF utility functions */ - -#include -#include -#include -#include - -#include -#include - -static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; - -DEFINE_MUTEX(buffer_lock); - -const char *kutf_dsprintf(struct kutf_mempool *pool, - const char *fmt, ...) -{ - va_list args; - int len; - int size; - void *buffer; - - mutex_lock(&buffer_lock); - va_start(args, fmt); - len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); - va_end(args); - - if (len < 0) { - pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt); - goto fail_format; - } - - if (len >= sizeof(tmp_buffer)) { - pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt); - size = sizeof(tmp_buffer); - } else { - size = len + 1; - } - - buffer = kutf_mempool_alloc(pool, size); - if (!buffer) - goto fail_alloc; - - memcpy(buffer, tmp_buffer, size); - mutex_unlock(&buffer_lock); - - return buffer; - -fail_alloc: -fail_format: - mutex_unlock(&buffer_lock); - return NULL; -} -EXPORT_SYMBOL(kutf_dsprintf); diff --git a/drivers/gpu/drm/bifrost/midgard/tests/kutf/sconscript b/drivers/gpu/drm/bifrost/midgard/tests/kutf/sconscript deleted file mode 100755 index 4590d1af34db..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/kutf/sconscript +++ /dev/null @@ -1,27 +0,0 @@ -# -# (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -Import('kutf_env') - -make_args = kutf_env.kernel_get_config_defines(ret_list = True, extra_cflags = ['-DCONFIG_MALI_KUTF'], extra_configs = ['CONFIG_MALI_KUTF=m']) - -mod = kutf_env.BuildKernelModule('$STATIC_LIB_PATH/kutf.ko', Glob('*.c'), make_args = make_args) -kutf_env.KernelObjTarget('kutf', mod) diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kbuild deleted file mode 100755 index ca8c51273b4c..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kbuild +++ /dev/null @@ -1,26 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android - -obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o - -mali_kutf_irq_test-y := mali_kutf_irq_test_main.o diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kconfig deleted file mode 100755 index 4a3863afc9bf..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Kconfig +++ /dev/null @@ -1,29 +0,0 @@ -# -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -config MALI_IRQ_LATENCY - tristate "Mali GPU IRQ latency measurement" - depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF - default m - help - This option will build a test module mali_kutf_irq_test that - can determine the latency of the Mali GPU IRQ on your system. - Choosing M here will generate a single module called mali_kutf_irq_test. diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Makefile deleted file mode 100755 index 9218a40f8069..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -# -# (C) COPYRIGHT 2015, 2017-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -# linux build system bootstrap for out-of-tree module - -# default to building for the host -ARCH ?= $(shell uname -m) - -ifeq ($(KDIR),) -$(error Must specify KDIR to point to the kernel to target)) -endif - -TEST_CCFLAGS := \ - -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ - -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ - -DMALI_USE_CSF=$(MALI_USE_CSF) \ - $(SCONS_CFLAGS) \ - -I$(CURDIR)/../include \ - -I$(CURDIR)/../../../../../../include \ - -I$(CURDIR)/../../../ \ - -I$(CURDIR)/../../ \ - -I$(CURDIR)/../../backend/gpu \ - -I$(CURDIR)/ \ - -I$(srctree)/drivers/staging/android \ - -I$(srctree)/include/linux - -all: - $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules - -clean: - $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/build.bp deleted file mode 100755 index 66f4eb3c4e90..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/build.bp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright: - * ---------------------------------------------------------------------------- - * This confidential and proprietary software may be used only as authorized - * by a licensing agreement from ARM Limited. - * (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorized copies and - * copies may only be made to the extent permitted by a licensing agreement - * from ARM Limited. - * ---------------------------------------------------------------------------- - */ - -bob_kernel_module { - name: "mali_kutf_irq_test", - defaults: ["kernel_test_module_defaults"], - srcs: [ - "Kbuild", - "mali_kutf_irq_test_main.c", - ], - extra_symbols: [ - "mali_kbase", - "kutf", - ], - enabled: false, - base_build_kutf: { - enabled: true, - kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], - }, -} diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c deleted file mode 100755 index 4181b7f92db6..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ /dev/null @@ -1,273 +0,0 @@ -/* - * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - -#include -#include -#include - -#include "mali_kbase.h" -#include - -#include -#include - -/* - * This file contains the code which is used for measuring interrupt latency - * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is - * used with this purpose and it is called within KUTF framework - a kernel - * unit test framework. The measured latency provided by this test should - * be representative for the latency of the Mali JOB/MMU IRQs as well. - */ - -/* KUTF test application pointer for this test */ -struct kutf_application *irq_app; - -/** - * struct kutf_irq_fixture data - test fixture used by the test functions. - * @kbdev: kbase device for the GPU. - * - */ -struct kutf_irq_fixture_data { - struct kbase_device *kbdev; -}; - -#define SEC_TO_NANO(s) ((s)*1000000000LL) - -/* ID for the GPU IRQ */ -#define GPU_IRQ_HANDLER 2 - -#define NR_TEST_IRQS 1000000 - -/* IRQ for the test to trigger. Currently MULTIPLE_GPU_FAULTS as we would not - * expect to see this in normal use (e.g., when Android is running). */ -#define TEST_IRQ MULTIPLE_GPU_FAULTS - -#define IRQ_TIMEOUT HZ - -/* Kernel API for setting irq throttle hook callback and irq time in us*/ -extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, - irq_handler_t custom_handler, - int irq_type); -extern irqreturn_t kbase_gpu_irq_handler(int irq, void *data); - -static DECLARE_WAIT_QUEUE_HEAD(wait); -static bool triggered; -static u64 irq_time; - -static void *kbase_untag(void *ptr) -{ - return (void *)(((uintptr_t) ptr) & ~3); -} - -/** - * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler - * @irq: IRQ number - * @data: Data associated with this IRQ - * - * Return: state of the IRQ - */ -static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) -{ - struct kbase_device *kbdev = kbase_untag(data); - u32 val; - - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); - if (val & TEST_IRQ) { - struct timespec tval; - - getnstimeofday(&tval); - irq_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); - - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); - - triggered = true; - wake_up(&wait); - - return IRQ_HANDLED; - } - - /* Trigger main irq handler */ - return kbase_gpu_irq_handler(irq, data); -} - -/** - * mali_kutf_irq_default_create_fixture() - Creates the fixture data required - * for all the tests in the irq suite. - * @context: KUTF context. - * - * Return: Fixture data created on success or NULL on failure - */ -static void *mali_kutf_irq_default_create_fixture( - struct kutf_context *context) -{ - struct kutf_irq_fixture_data *data; - - data = kutf_mempool_alloc(&context->fixture_pool, - sizeof(struct kutf_irq_fixture_data)); - - if (!data) - goto fail; - - /* Acquire the kbase device */ - data->kbdev = kbase_find_device(-1); - if (data->kbdev == NULL) { - kutf_test_fail(context, "Failed to find kbase device"); - goto fail; - } - - return data; - -fail: - return NULL; -} - -/** - * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously - * created by mali_kutf_irq_default_create_fixture. - * - * @context: KUTF context. - */ -static void mali_kutf_irq_default_remove_fixture( - struct kutf_context *context) -{ - struct kutf_irq_fixture_data *data = context->fixture; - struct kbase_device *kbdev = data->kbdev; - - kbase_release_device(kbdev); -} - -/** - * mali_kutf_irq_latency() - measure GPU IRQ latency - * @context: kutf context within which to perform the test - * - * The test triggers IRQs manually, and measures the - * time between triggering the IRQ and the IRQ handler being executed. - * - * This is not a traditional test, in that the pass/fail status has little - * meaning (other than indicating that the IRQ handler executed at all). Instead - * the results are in the latencies provided with the test result. There is no - * meaningful pass/fail result that can be obtained here, instead the latencies - * are provided for manual analysis only. - */ -static void mali_kutf_irq_latency(struct kutf_context *context) -{ - struct kutf_irq_fixture_data *data = context->fixture; - struct kbase_device *kbdev = data->kbdev; - u64 min_time = U64_MAX, max_time = 0, average_time = 0; - int i; - bool test_failed = false; - - /* Force GPU to be powered */ - kbase_pm_context_active(kbdev); - - kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, - GPU_IRQ_HANDLER); - - for (i = 0; i < NR_TEST_IRQS; i++) { - struct timespec tval; - u64 start_time; - int ret; - - triggered = false; - getnstimeofday(&tval); - start_time = SEC_TO_NANO(tval.tv_sec) + (tval.tv_nsec); - - /* Trigger fake IRQ */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - TEST_IRQ); - - ret = wait_event_timeout(wait, triggered != false, IRQ_TIMEOUT); - - if (ret == 0) { - kutf_test_fail(context, "Timed out waiting for IRQ\n"); - test_failed = true; - break; - } - - if ((irq_time - start_time) < min_time) - min_time = irq_time - start_time; - if ((irq_time - start_time) > max_time) - max_time = irq_time - start_time; - average_time += irq_time - start_time; - - udelay(10); - } - - /* Go back to default handler */ - kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); - - kbase_pm_context_idle(kbdev); - - if (!test_failed) { - const char *results; - - do_div(average_time, NR_TEST_IRQS); - results = kutf_dsprintf(&context->fixture_pool, - "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", - min_time, max_time, average_time); - kutf_test_pass(context, results); - } -} - -/** - * Module entry point for this test. - */ -int mali_kutf_irq_test_main_init(void) -{ - struct kutf_suite *suite; - - irq_app = kutf_create_application("irq"); - - if (NULL == irq_app) { - pr_warn("Creation of test application failed!\n"); - return -ENOMEM; - } - - suite = kutf_create_suite(irq_app, "irq_default", - 1, mali_kutf_irq_default_create_fixture, - mali_kutf_irq_default_remove_fixture); - - if (NULL == suite) { - pr_warn("Creation of test suite failed!\n"); - kutf_destroy_application(irq_app); - return -ENOMEM; - } - - kutf_add_test(suite, 0x0, "irq_latency", - mali_kutf_irq_latency); - return 0; -} - -/** - * Module exit point for this test. - */ -void mali_kutf_irq_test_main_exit(void) -{ - kutf_destroy_application(irq_app); -} - -module_init(mali_kutf_irq_test_main_init); -module_exit(mali_kutf_irq_test_main_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("ARM Ltd."); -MODULE_VERSION("1.0"); diff --git a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/sconscript b/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/sconscript deleted file mode 100755 index cefac0be51cb..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/mali_kutf_irq_test/sconscript +++ /dev/null @@ -1,36 +0,0 @@ -# -# (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -import os -Import('env') - -src = [Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/*.c'), Glob('#kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile')] - -if env.GetOption('clean') : - env.Execute(Action("make clean", '[CLEAN] mali_kutf_irq_test')) - cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, []) - env.KernelObjTarget('mali_kutf_irq_test', cmd) -else: - makeAction=Action("cd ${SOURCE.dir} && make MALI_UNIT_TEST=${unit} MALI_CUSTOMER_RELEASE=${release} MALI_USE_CSF=${csf} %s && ( ( [ -f mali_kutf_irq_test.ko ] && cp mali_kutf_irq_test.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/mali_kutf_irq_test.ko)" % env.kernel_get_config_defines(extra_cflags = ['-DCONFIG_MALI_IRQ_LATENCY'], extra_configs = ['CONFIG_MALI_IRQ_LATENCY=m']), '$MAKECOMSTR') - cmd = env.Command('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', src, [makeAction]) - env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/kutf.ko') - env.Depends('$STATIC_LIB_PATH/mali_kutf_irq_test.ko', '$STATIC_LIB_PATH/mali_kbase.ko') - env.KernelObjTarget('mali_kutf_irq_test', cmd) diff --git a/drivers/gpu/drm/bifrost/midgard/tests/sconscript b/drivers/gpu/drm/bifrost/midgard/tests/sconscript deleted file mode 100755 index ca64e8360955..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/tests/sconscript +++ /dev/null @@ -1,43 +0,0 @@ -# -# (C) COPYRIGHT 2010-2011, 2013, 2017-2018 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -Import ('env') - -kutf_env = env.Clone() -kutf_env.Append(CPPPATH = '#kernel/drivers/gpu/arm/midgard/tests/include') -Export('kutf_env') - -if Glob('internal/sconscript'): - SConscript('internal/sconscript') - -if kutf_env['debug'] == '1': - SConscript('kutf/sconscript') - SConscript('mali_kutf_irq_test/sconscript') - - if Glob('kutf_test/sconscript'): - SConscript('kutf_test/sconscript') - - if Glob('kutf_test_runner/sconscript'): - SConscript('kutf_test_runner/sconscript') - -if env['unit'] == '1': - SConscript('mali_kutf_ipa_unit_test/sconscript') - SConscript('mali_kutf_vinstr_test/sconscript') diff --git a/drivers/gpu/drm/bifrost/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/drm/bifrost/midgard/thirdparty/mali_kbase_mmap.c deleted file mode 100755 index 3aab51a173f0..000000000000 --- a/drivers/gpu/drm/bifrost/midgard/thirdparty/mali_kbase_mmap.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "linux/mman.h" -#include "../mali_kbase.h" - -/* mali_kbase_mmap.c - * - * This file contains Linux specific implementation of - * kbase_get_unmapped_area() interface. - */ - - -/** - * align_and_check() - Align the specified pointer to the provided alignment and - * check that it is still in range. - * @gap_end: Highest possible start address for allocation (end of gap in - * address space) - * @gap_start: Start address of current memory area / gap in address space - * @info: vm_unmapped_area_info structure passed to caller, containing - * alignment, length and limits for the allocation - * @is_shader_code: True if the allocation is for shader code (which has - * additional alignment requirements) - * @is_same_4gb_page: True if the allocation needs to reside completely within - * a 4GB chunk - * - * Return: true if gap_end is now aligned correctly and is still in range, - * false otherwise - */ -static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, - struct vm_unmapped_area_info *info, bool is_shader_code, - bool is_same_4gb_page) -{ - /* Compute highest gap address at the desired alignment */ - (*gap_end) -= info->length; - (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; - - if (is_shader_code) { - /* Check for 4GB boundary */ - if (0 == (*gap_end & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : - info->length); - if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) - (*gap_end) -= (info->align_offset ? info->align_offset : - info->length); - - if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + - info->length) & BASE_MEM_MASK_4GB)) - return false; - } else if (is_same_4gb_page) { - unsigned long start = *gap_end; - unsigned long end = *gap_end + info->length; - unsigned long mask = ~((unsigned long)U32_MAX); - - /* Check if 4GB boundary is straddled */ - if ((start & mask) != ((end - 1) & mask)) { - unsigned long offset = end - (end & mask); - /* This is to ensure that alignment doesn't get - * disturbed in an attempt to prevent straddling at - * 4GB boundary. The GPU VA is aligned to 2MB when the - * allocation size is > 2MB and there is enough CPU & - * GPU virtual space. - */ - unsigned long rounded_offset = - ALIGN(offset, info->align_mask + 1); - - start -= rounded_offset; - end -= rounded_offset; - - *gap_end = start; - - /* The preceding 4GB boundary shall not get straddled, - * even after accounting for the alignment, as the - * size of allocation is limited to 4GB and the initial - * start location was already aligned. - */ - WARN_ON((start & mask) != ((end - 1) & mask)); - } - } - - - if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) - return false; - - - return true; -} - -/** - * kbase_unmapped_area_topdown() - allocates new areas top-down from - * below the stack limit. - * @info: Information about the memory area to allocate. - * @is_shader_code: Boolean which denotes whether the allocated area is - * intended for the use by shader core in which case a - * special alignment requirements apply. - * @is_same_4gb_page: Boolean which indicates whether the allocated area needs - * to reside completely within a 4GB chunk. - * - * The unmapped_area_topdown() function in the Linux kernel is not exported - * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a - * module and also make use of the fact that some of the requirements for - * the unmapped area are known in advance, we implemented an extended version - * of this function and prefixed it with 'kbase_'. - * - * The difference in the call parameter list comes from the fact that - * kbase_unmapped_area_topdown() is called with additional parameters which - * are provided to indicate whether the allocation is for a shader core memory, - * which has additional alignment requirements, and whether the allocation can - * straddle a 4GB boundary. - * - * The modification of the original Linux function lies in how the computation - * of the highest gap address at the desired alignment is performed once the - * gap with desirable properties is found. For this purpose a special function - * is introduced (@ref align_and_check()) which beside computing the gap end - * at the desired alignment also performs additional alignment checks for the - * case when the memory is executable shader core memory, for which it is - * ensured that the gap does not end on a 4GB boundary, and for the case when - * memory needs to be confined within a 4GB chunk. - * - * Return: address of the found gap end (high limit) if area is found; - * -ENOMEM if search is unsuccessful -*/ - -static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info - *info, bool is_shader_code, bool is_same_4gb_page) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long length, low_limit, high_limit, gap_start, gap_end; - - /* Adjust search length to account for worst case alignment overhead */ - length = info->length + info->align_mask; - if (length < info->length) - return -ENOMEM; - - /* - * Adjust search limits by the desired length. - * See implementation comment at top of unmapped_area(). - */ - gap_end = info->high_limit; - if (gap_end < length) - return -ENOMEM; - high_limit = gap_end - length; - - if (info->low_limit > high_limit) - return -ENOMEM; - low_limit = info->low_limit + length; - - /* Check highest gap, which does not precede any rbtree node */ - gap_start = mm->highest_vm_end; - if (gap_start <= high_limit) { - if (align_and_check(&gap_end, gap_start, info, - is_shader_code, is_same_4gb_page)) - return gap_end; - } - - /* Check if rbtree root looks promising */ - if (RB_EMPTY_ROOT(&mm->mm_rb)) - return -ENOMEM; - vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); - if (vma->rb_subtree_gap < length) - return -ENOMEM; - - while (true) { - /* Visit right subtree if it looks promising */ - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; - if (gap_start <= high_limit && vma->vm_rb.rb_right) { - struct vm_area_struct *right = - rb_entry(vma->vm_rb.rb_right, - struct vm_area_struct, vm_rb); - if (right->rb_subtree_gap >= length) { - vma = right; - continue; - } - } - -check_current: - /* Check if current node has a suitable gap */ - gap_end = vma->vm_start; - if (gap_end < low_limit) - return -ENOMEM; - if (gap_start <= high_limit && gap_end - gap_start >= length) { - /* We found a suitable gap. Clip it with the original - * high_limit. */ - if (gap_end > info->high_limit) - gap_end = info->high_limit; - - if (align_and_check(&gap_end, gap_start, info, - is_shader_code, is_same_4gb_page)) - return gap_end; - } - - /* Visit left subtree if it looks promising */ - if (vma->vm_rb.rb_left) { - struct vm_area_struct *left = - rb_entry(vma->vm_rb.rb_left, - struct vm_area_struct, vm_rb); - if (left->rb_subtree_gap >= length) { - vma = left; - continue; - } - } - - /* Go back up the rbtree to find next candidate node */ - while (true) { - struct rb_node *prev = &vma->vm_rb; - - if (!rb_parent(prev)) - return -ENOMEM; - vma = rb_entry(rb_parent(prev), - struct vm_area_struct, vm_rb); - if (prev == vma->vm_rb.rb_right) { - gap_start = vma->vm_prev ? - vma->vm_prev->vm_end : 0; - goto check_current; - } - } - } - - return -ENOMEM; -} - - -/* This function is based on Linux kernel's arch_get_unmapped_area, but - * simplified slightly. Modifications come from the fact that some values - * about the memory area are known in advance. - */ -unsigned long kbase_get_unmapped_area(struct file *filp, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags) -{ - struct kbase_context *kctx = filp->private_data; - struct mm_struct *mm = current->mm; - struct vm_unmapped_area_info info; - unsigned long align_offset = 0; - unsigned long align_mask = 0; - unsigned long high_limit = mm->mmap_base; - unsigned long low_limit = PAGE_SIZE; - int cpu_va_bits = BITS_PER_LONG; - int gpu_pc_bits = - kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; - bool is_shader_code = false; - bool is_same_4gb_page = false; - unsigned long ret; - - /* err on fixed address */ - if ((flags & MAP_FIXED) || addr) - return -EINVAL; - -#ifdef CONFIG_64BIT - /* too big? */ - if (len > TASK_SIZE - SZ_2M) - return -ENOMEM; - - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - - high_limit = min_t(unsigned long, mm->mmap_base, - (kctx->same_va_end << PAGE_SHIFT)); - - /* If there's enough (> 33 bits) of GPU VA space, align - * to 2MB boundaries. - */ - if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { - if (len >= SZ_2M) { - align_offset = SZ_2M; - align_mask = SZ_2M - 1; - } - } - - low_limit = SZ_2M; - } else { - cpu_va_bits = 32; - } -#endif /* CONFIG_64BIT */ - if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && - (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { - int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); - struct kbase_va_region *reg = - kctx->pending_regions[cookie]; - - if (!reg) - return -EINVAL; - - if (!(reg->flags & KBASE_REG_GPU_NX)) { - if (cpu_va_bits > gpu_pc_bits) { - align_offset = 1ULL << gpu_pc_bits; - align_mask = align_offset - 1; - is_shader_code = true; - } - } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { - unsigned long extent_bytes = - (unsigned long)(reg->extent << PAGE_SHIFT); - /* kbase_check_alloc_sizes() already satisfies - * these checks, but they're here to avoid - * maintenance hazards due to the assumptions - * involved */ - WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT)); - WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); - WARN_ON(!is_power_of_2(extent_bytes)); - align_mask = extent_bytes - 1; - align_offset = - extent_bytes - (reg->initial_commit << PAGE_SHIFT); - } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { - is_same_4gb_page = true; - } -#ifndef CONFIG_64BIT - } else { - return current->mm->get_unmapped_area(filp, addr, len, pgoff, - flags); -#endif - } - - info.flags = 0; - info.length = len; - info.low_limit = low_limit; - info.high_limit = high_limit; - info.align_offset = align_offset; - info.align_mask = align_mask; - - ret = kbase_unmapped_area_topdown(&info, is_shader_code, - is_same_4gb_page); - - if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && - high_limit < (kctx->same_va_end << PAGE_SHIFT)) { - /* Retry above mmap_base */ - info.low_limit = mm->mmap_base; - info.high_limit = min_t(u64, TASK_SIZE, - (kctx->same_va_end << PAGE_SHIFT)); - - ret = kbase_unmapped_area_topdown(&info, is_shader_code, - is_same_4gb_page); - } - - return ret; -} diff --git a/drivers/gpu/drm/bifrost/sconscript b/drivers/gpu/drm/bifrost/sconscript deleted file mode 100755 index dd02acd2a20e..000000000000 --- a/drivers/gpu/drm/bifrost/sconscript +++ /dev/null @@ -1,26 +0,0 @@ -# -# (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. -# -# This program is free software and is provided to you under the terms of the -# GNU General Public License version 2 as published by the Free Software -# Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# SPDX-License-Identifier: GPL-2.0 -# -# - -import glob - - -SConscript('midgard/sconscript') -