From: Jayanth Dodderi Chidanand Date: Tue, 8 Nov 2022 10:31:07 +0000 (+0000) Subject: feat(sme): enable SME2 functionality for NS world X-Git-Tag: baikal/aarch64/sdk5.10~1^2~55^2 X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=03d3c0d729e24713d657209bedf74d255550babb;p=arm-tf.git feat(sme): enable SME2 functionality for NS world FEAT_SME2 is an extension of FEAT_SME and an optional feature from v9.2. Its an extension of SME, wherein it not only processes matrix operations efficiently, but also provides outer-product instructions to accelerate matrix operations. It affords instructions for multi-vector operations. Further, it adds an 512 bit architectural register ZT0. This patch implements all the changes introduced with FEAT_SME2 to ensure that the instructions are allowed to access ZT0 register from Non-secure lower exception levels. Additionally, it adds support to ensure FEAT_SME2 is aligned with the existing FEATURE DETECTION mechanism, and documented. Change-Id: Iee0f61943304a9cfc3db8f986047b1321d0a6463 Signed-off-by: Jayanth Dodderi Chidanand --- diff --git a/Makefile b/Makefile index 5306ddfe5..892c2c9e8 100644 --- a/Makefile +++ b/Makefile @@ -861,8 +861,12 @@ ifeq ($(FEATURE_DETECTION),1) $(info FEATURE_DETECTION is an experimental feature) endif -ifneq ($(ENABLE_SME_FOR_NS), 0) - $(info ENABLE_SME_FOR_NS is an experimental feature) +ifneq ($(ENABLE_SME2_FOR_NS), 0) + ifeq (${ENABLE_SME_FOR_NS}, 0) + $(warning "ENABLE_SME2_FOR_NS requires ENABLE_SME_FOR_NS also to be set") + $(warning "Forced ENABLE_SME_FOR_NS=1") + override ENABLE_SME_FOR_NS := 1 + endif endif ifeq (${ARM_XLAT_TABLES_LIB_V1}, 1) @@ -884,6 +888,7 @@ ifeq (${ARCH},aarch32) ifneq (${ENABLE_SME_FOR_NS},0) $(error "ENABLE_SME_FOR_NS cannot be used with ARCH=aarch32") endif + ifeq (${ENABLE_SVE_FOR_NS},1) # Warning instead of error due to CI dependency on this $(error "ENABLE_SVE_FOR_NS cannot be used with ARCH=aarch32") @@ -925,6 +930,7 @@ ifeq (${CTX_INCLUDE_FPREGS},1) ifneq (${ENABLE_SME_FOR_NS},0) $(error "ENABLE_SME_FOR_NS cannot be used with CTX_INCLUDE_FPREGS") endif + ifeq (${ENABLE_SVE_FOR_NS},1) # Warning instead of error due to CI dependency on this $(warning "ENABLE_SVE_FOR_NS cannot be used with CTX_INCLUDE_FPREGS") @@ -1196,6 +1202,7 @@ $(eval $(call assert_numerics,\ ENABLE_SPE_FOR_NS \ ENABLE_SYS_REG_TRACE_FOR_NS \ ENABLE_SME_FOR_NS \ + ENABLE_SME2_FOR_NS \ ENABLE_SVE_FOR_NS \ ENABLE_TRF_FOR_NS \ FW_ENC_STATUS \ @@ -1251,6 +1258,7 @@ $(eval $(call add_defines,\ ENABLE_RME \ ENABLE_RUNTIME_INSTRUMENTATION \ ENABLE_SME_FOR_NS \ + ENABLE_SME2_FOR_NS \ ENABLE_SME_FOR_SWD \ ENABLE_SPE_FOR_NS \ ENABLE_SVE_FOR_NS \ diff --git a/changelog.yaml b/changelog.yaml index d54c62b04..6dbb9b20d 100644 --- a/changelog.yaml +++ b/changelog.yaml @@ -128,7 +128,7 @@ subsections: - title: Trapping support for RNDR/RNDRRS (FEAT_RNG_TRAP) scope: rng-trap - - title: Scalable Matrix Extension (FEAT_SME) + - title: Scalable Matrix Extension (FEAT_SME, FEAT_SME2) scope: sme - title: Statistical profiling Extension (FEAT_SPE) diff --git a/common/feat_detect.c b/common/feat_detect.c index c8a070392..eb4db95a0 100644 --- a/common/feat_detect.c +++ b/common/feat_detect.c @@ -218,6 +218,8 @@ void detect_arch_features(void) /* v9.2 features */ check_feature(ENABLE_SME_FOR_NS, read_feat_sme_id_field(), "SME", 1, 2); + check_feature(ENABLE_SME2_FOR_NS, read_feat_sme_id_field(), + "SME2", 2, 2); /* v9.4 features */ check_feature(ENABLE_FEAT_GCS, read_feat_gcs_id_field(), "GCS", 1, 1); diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst index d2f463fab..2229591b5 100644 --- a/docs/getting_started/build-options.rst +++ b/docs/getting_started/build-options.rst @@ -442,6 +442,13 @@ Common build options build with these options will fail. This flag can take the values 0 to 2, to align with the ``FEATURE_DETECTION`` mechanism. Default is 0. +- ``ENABLE_SME2_FOR_NS``: Numeric value to enable Scalable Matrix Extension + version 2 (SME2) for the non-secure world only. SME2 is an optional + architectural feature for AArch64 and TF-A support is experimental. + This should be set along with ENABLE_SME_FOR_NS=1, if not, the default SME + accesses will still be trapped. This flag can take the values 0 to 2, to + align with the ``FEATURE_DETECTION`` mechanism. Default is 0. + - ``ENABLE_SME_FOR_SWD``: Boolean option to enable the Scalable Matrix Extension for secure world use along with SVE and FPU/SIMD, ENABLE_SME_FOR_NS must also be set to use this. If enabling this, the secure world MUST diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h index 9e061bfb4..ac5eae249 100644 --- a/include/arch/aarch64/arch.h +++ b/include/arch/aarch64/arch.h @@ -412,6 +412,7 @@ #define ID_AA64PFR1_EL1_SME_MASK ULL(0xf) #define ID_AA64PFR1_EL1_SME_NOT_SUPPORTED ULL(0x0) #define ID_AA64PFR1_EL1_SME_SUPPORTED ULL(0x1) +#define ID_AA64PFR1_EL1_SME2_SUPPORTED ULL(0x2) /* ID_PFR1_EL1 definitions */ #define ID_PFR1_VIRTEXT_SHIFT U(12) @@ -519,6 +520,8 @@ #define CPACR_EL1_FP_TRAP_EL0 UL(0x1) #define CPACR_EL1_FP_TRAP_ALL UL(0x2) #define CPACR_EL1_FP_TRAP_NONE UL(0x3) +#define CPACR_EL1_SMEN_SHIFT U(24) +#define CPACR_EL1_SMEN_MASK ULL(0x3) /* SCR definitions */ #define SCR_RES1_BITS ((U(1) << 4) | (U(1) << 5)) @@ -1026,11 +1029,16 @@ #define ID_AA64SMFR0_EL1_SME_FA64_SHIFT U(63) #define ID_AA64SMFR0_EL1_SME_FA64_MASK U(0x1) #define ID_AA64SMFR0_EL1_SME_FA64_SUPPORTED U(0x1) +#define ID_AA64SMFR0_EL1_SME_VER_SHIFT U(55) +#define ID_AA64SMFR0_EL1_SME_VER_MASK ULL(0xf) +#define ID_AA64SMFR0_EL1_SME_INST_SUPPORTED ULL(0x0) +#define ID_AA64SMFR0_EL1_SME2_INST_SUPPORTED ULL(0x1) /* SMCR_ELx definitions */ #define SMCR_ELX_LEN_SHIFT U(0) -#define SMCR_ELX_LEN_MASK U(0x1ff) +#define SMCR_ELX_LEN_MAX U(0x1ff) #define SMCR_ELX_FA64_BIT (U(1) << 31) +#define SMCR_ELX_EZT0_BIT (U(1) << 30) /******************************************************************************* * Definitions of MAIR encodings for device and normal memory diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h index 40ab82fb0..a0141defa 100644 --- a/include/arch/aarch64/arch_features.h +++ b/include/arch/aarch64/arch_features.h @@ -657,4 +657,17 @@ static inline bool is_feat_sme_supported(void) return read_feat_sme_id_field() >= ID_AA64PFR1_EL1_SME_SUPPORTED; } +static inline bool is_feat_sme2_supported(void) +{ + if (ENABLE_SME2_FOR_NS == FEAT_STATE_DISABLED) { + return false; + } + + if (ENABLE_SME2_FOR_NS == FEAT_STATE_ALWAYS) { + return true; + } + + return read_feat_sme_id_field() >= ID_AA64PFR1_EL1_SME2_SUPPORTED; +} + #endif /* ARCH_FEATURES_H */ diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c index 29034fdc4..f888d12c4 100644 --- a/lib/extensions/sme/sme.c +++ b/lib/extensions/sme/sme.c @@ -43,11 +43,23 @@ void sme_enable(cpu_context_t *context) * to be the least restrictive, then lower ELs can restrict as needed * using SMCR_EL2 and SMCR_EL1. */ - reg = SMCR_ELX_LEN_MASK; + reg = SMCR_ELX_LEN_MAX; + if (read_feat_sme_fa64_id_field() != 0U) { VERBOSE("[SME] FA64 enabled\n"); reg |= SMCR_ELX_FA64_BIT; } + + /* + * Enable access to ZT0 register. + * Make sure FEAT_SME2 is supported by the hardware before continuing. + * If supported, Set the EZT0 bit in SMCR_EL3 to allow instructions to + * access ZT0 register without trapping. + */ + if (is_feat_sme2_supported()) { + VERBOSE("SME2 enabled\n"); + reg |= SMCR_ELX_EZT0_BIT; + } write_smcr_el3(reg); /* Reset CPTR_EL3 value. */ diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk index 34a9bc6b9..021893c58 100644 --- a/make_helpers/defaults.mk +++ b/make_helpers/defaults.mk @@ -405,6 +405,9 @@ ifneq (${ENABLE_SME_FOR_NS},0) override ENABLE_SVE_FOR_SWD := 0 endif +# SME2 defaults to disabled +ENABLE_SME2_FOR_NS := 0 + SANITIZE_UB := off # For ARMv8.1 (AArch64) platforms, enabling this option selects the spinlock diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk index 214064bf1..26843e407 100644 --- a/plat/arm/board/fvp/platform.mk +++ b/plat/arm/board/fvp/platform.mk @@ -55,6 +55,7 @@ ifeq (${SPM_MM}, 0) ifeq (${ENABLE_RME}, 0) ifeq (${CTX_INCLUDE_FPREGS}, 0) ENABLE_SME_FOR_NS := 2 + ENABLE_SME2_FOR_NS := 2 endif endif endif