]> git.baikalelectronics.ru Git - kernel.git/commitdiff
arm64/sve: Write ZCR_EL1 on context switch only if changed
authorDave Martin <Dave.Martin@arm.com>
Mon, 14 May 2018 17:51:09 +0000 (18:51 +0100)
committerCatalin Marinas <catalin.marinas@arm.com>
Thu, 17 May 2018 17:19:53 +0000 (18:19 +0100)
Writes to ZCR_EL1 are self-synchronising, and so may be expensive
in typical implementations.

This patch adopts the approach used for costly system register
writes elsewhere in the kernel: the system register write is
suppressed if it would not change the stored value.

Since the common case will be that of switching between tasks that
use the same vector length as one another, prediction hit rates on
the conditional branch should be reasonably good, with lower
expected amortised cost than the unconditional execution of a
heavyweight self-synchronising instruction.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
arch/arm64/include/asm/fpsimdmacros.h
arch/arm64/kernel/entry-fpsimd.S

index e050d765ca9e11a60d0b381f7c8463fd5011672a..46843515d77b07eea027209c94443d70c8d93960 100644 (file)
                str             w\nxtmp, [\xpfpsr, #4]
 .endm
 
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp
+.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
                mrs_s           x\nxtmp, SYS_ZCR_EL1
-               bic             x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK
-               orr             x\nxtmp, x\nxtmp, \xvqminus1
-               msr_s           SYS_ZCR_EL1, x\nxtmp    // self-synchronising
-
+               bic             \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK
+               orr             \xtmp2, \xtmp2, \xvqminus1
+               cmp             \xtmp2, x\nxtmp
+               b.eq            921f
+               msr_s           SYS_ZCR_EL1, \xtmp2     // self-synchronising
+921:
  _for n, 0, 31,        _sve_ldr_v      \n, \nxbase, \n - 34
                _sve_ldr_p      0, \nxbase
                _sve_wrffr      0
index 73f17bffcd2306d21f68c6ce2b468c047845bef2..12d4958e6429f28ed5c07ad8347bbca53a24f49d 100644 (file)
@@ -49,7 +49,7 @@ ENTRY(sve_save_state)
 ENDPROC(sve_save_state)
 
 ENTRY(sve_load_state)
-       sve_load 0, x1, x2, 3
+       sve_load 0, x1, x2, 3, x4
        ret
 ENDPROC(sve_load_state)