cpus: higher performance non-cacheable load forwarding

The CPUACTLR_EL1 register on Cortex-A57 CPUs supports a bit to enable non-cacheable streaming enhancement. Platforms can set this bit only if their memory system meets the requirement that cache line fill requests from the Cortex-A57 processor are atomic. This patch adds support to enable higher performance non-cacheable load forwarding for such platforms. Platforms must enable this support by setting the 'A57_ENABLE_NONCACHEABLE_LOAD_FWD' flag from their makefiles. This flag is disabled by default. Change-Id: Ib27e55dd68d11a50962c0bbc5b89072208b4bac5 Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
author: Varun Wadekar <vwadekar@nvidia.com> 2018-06-12 16:49:12 -0700
committer: Varun Wadekar <vwadekar@nvidia.com> 2020-02-20 09:25:45 -0800
commit: cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64 (patch)
tree: d16fbe9ae43976a60290fef60aaf78e51108fcdb
parent: eda880ff8ec77ee429f5249f08571c41232b27db (diff)
download: platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.tar.gz
platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.tar.bz2
platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.zip
4 files changed, 30 insertions, 0 deletions
diff --git a/docs/design/cpu-specific-build-macros.rst b/docs/design/cpu-specific-build-macros.rst
index f3096b418..258f73d0b 100644
--- a/docs/design/cpu-specific-build-macros.rst
+++ b/docs/design/cpu-specific-build-macros.rst
@@ -324,6 +324,13 @@ architecture that can be enabled by the platform as desired.
    as recommended in section "4.7 Non-Temporal Loads/Stores" of the
    `Cortex-A57 Software Optimization Guide`_.
 
+- ''A57_ENABLE_NON_CACHEABLE_LOAD_FWD'': This flag enables non-cacheable
+   streaming enhancement feature for Cortex-A57 CPUs. Platforms can set
+   this bit only if their memory system meets the requirement that cache
+   line fill requests from the Cortex-A57 processor are atomic. Each
+   Cortex-A57 based platform must make its own decision on whether to use
+   the optimization. This flag is disabled by default.
+
 -  ``NEOVERSE_N1_EXTERNAL_LLC``: This flag indicates that an external last
    level cache(LLC) is present in the system, and that the DataSource field
    on the master CHI interface indicates when data is returned from the LLC.
diff --git a/include/lib/cpus/aarch64/cortex_a57.h b/include/lib/cpus/aarch64/cortex_a57.h
index 102ff60c3..dc40e31ad 100644
--- a/include/lib/cpus/aarch64/cortex_a57.h
+++ b/include/lib/cpus/aarch64/cortex_a57.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -54,6 +55,7 @@
 #define CORTEX_A57_CPUACTLR_EL1_FORCE_FPSCR_FLUSH	(ULL(1) << 38)
 #define CORTEX_A57_CPUACTLR_EL1_DIS_INSTR_PREFETCH	(ULL(1) << 32)
 #define CORTEX_A57_CPUACTLR_EL1_DIS_STREAMING		(ULL(3) << 27)
+#define CORTEX_A57_CPUACTLR_EL1_EN_NC_LOAD_FWD		(ULL(1) << 24)
 #define CORTEX_A57_CPUACTLR_EL1_DIS_L1_STREAMING	(ULL(3) << 25)
 #define CORTEX_A57_CPUACTLR_EL1_DIS_INDIRECT_PREDICTOR	(ULL(1) << 4)
 
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index dd03c0f02..3fee4704e 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -469,6 +470,17 @@ func cortex_a57_reset_func
 	dsb	sy
 #endif
 
+#if A57_ENABLE_NONCACHEABLE_LOAD_FWD
+	/* ---------------------------------------------
+	 * Enable higher performance non-cacheable load
+	 * forwarding
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A57_CPUACTLR_EL1
+	orr	x0, x0, #CORTEX_A57_CPUACTLR_EL1_EN_NC_LOAD_FWD
+	msr	CORTEX_A57_CPUACTLR_EL1, x0
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * ---------------------------------------------
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index e3bfc2f2e..3c0c9cd13 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -1,5 +1,6 @@
 #
 # Copyright (c) 2014-2020, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
@@ -16,6 +17,10 @@ A53_DISABLE_NON_TEMPORAL_HINT	?=1
 # It is enabled by default.
 A57_DISABLE_NON_TEMPORAL_HINT	?=1
 
+# Flag to enable higher performance non-cacheable load forwarding.
+# It is disabled by default.
+A57_ENABLE_NONCACHEABLE_LOAD_FWD	?= 0
+
 WORKAROUND_CVE_2017_5715	?=1
 WORKAROUND_CVE_2018_3639	?=1
 DYNAMIC_WORKAROUND_CVE_2018_3639	?=0
@@ -24,6 +29,10 @@ DYNAMIC_WORKAROUND_CVE_2018_3639	?=0
 # By default internal
 NEOVERSE_N1_EXTERNAL_LLC	?=0
 
+# Process A57_ENABLE_NONCACHEABLE_LOAD_FWD flag
+$(eval $(call assert_boolean,A57_ENABLE_NONCACHEABLE_LOAD_FWD))
+$(eval $(call add_define,A57_ENABLE_NONCACHEABLE_LOAD_FWD))
+
 # Process SKIP_A57_L1_FLUSH_PWR_DWN flag
 $(eval $(call assert_boolean,SKIP_A57_L1_FLUSH_PWR_DWN))
 $(eval $(call add_define,SKIP_A57_L1_FLUSH_PWR_DWN))
author	Varun Wadekar <vwadekar@nvidia.com>	2018-06-12 16:49:12 -0700
committer	Varun Wadekar <vwadekar@nvidia.com>	2020-02-20 09:25:45 -0800
commit	cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64 (patch)
tree	d16fbe9ae43976a60290fef60aaf78e51108fcdb
parent	eda880ff8ec77ee429f5249f08571c41232b27db (diff)
download	platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.tar.gz platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.tar.bz2 platform_external_arm-trusted-firmware-cd0ea1842f7ef5f3c8ccc3205cc0f3840f573f64.zip