From 6bda667037764e116d7e43654522945f3822a14e Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Tue, 15 Jun 2021 19:17:36 +0200 Subject: [PATCH] s390/boot: move dma sections from decompressor to decompressed kernel This change simplifies the task of making the decompressor relocatable. The decompressor's image contains special DMA sections between _sdma and _edma. This DMA segment is loaded at boot as part of the decompressor and then simply handed over to the decompressed kernel. The decompressor itself never uses it in any way. The primary reason for this is the need to keep the aforementioned DMA segment below 2GB which is required by architecture, and because the decompressor is always loaded at a fixed low physical address, it is guaranteed that the DMA region will not cross the 2GB memory limit. If the DMA region had been placed in the decompressed kernel, then KASLR would make this guarantee impossible to fulfill or it would be restricted to the first 2GB of memory address space. This commit moves all DMA sections between _sdma and _edma from the decompressor's image to the decompressed kernel's image. The complete DMA region is placed in the init section of the decompressed kernel and immediately relocated below 2GB at start-up before it is needed by other parts of the decompressed kernel. The relocation of the DMA region happens even if the decompressed kernel is already located below 2GB in order to keep the first implementation simple. The relocation should not have any noticeable impact on boot time because the DMA segment is only a couple of pages. After relocating the DMA sections, the kernel has to fix all references which point into it. In order to automate this, place all variables pointing into the DMA sections in a special .dma.refs section. All such variables must be defined using the new __dma_ref macro. Only variables containing addresses within the DMA sections must be placed in the new .dma.refs section. Furthermore, move the initialization of control registers from the decompressor to the decompressed kernel because some control registers reference tables that must be placed in the DMA data section to guarantee that their addresses are below 2G. Because the decompressed kernel relocates the DMA sections at startup, the content of control registers CR2, CR5 and CR15 must be updated with new addresses after the relocation. The decompressed kernel initializes all control registers early at boot and then updates the content of CR2, CR5 and CR15 as soon as the DMA relocation has occurred. This practically reverts the commit a80313ff91ab ("s390/kernel: introduce .dma sections"). Signed-off-by: Alexander Egorenkov Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/boot.h | 4 - arch/s390/boot/compressed/vmlinux.lds.S | 23 ----- arch/s390/boot/head.S | 30 ------- arch/s390/boot/startup.c | 31 ------- arch/s390/include/asm/diag.h | 8 ++ arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/diag.c | 13 ++- arch/s390/kernel/entry.h | 9 ++ arch/s390/kernel/head64.S | 17 ++++ arch/s390/kernel/setup.c | 110 ++++++++++++++++++++++-- arch/s390/{boot => kernel}/text_dma.S | 0 arch/s390/kernel/vmlinux.lds.S | 33 +++++++ 13 files changed, 183 insertions(+), 99 deletions(-) rename arch/s390/{boot => kernel}/text_dma.S (100%) diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 41a64b8dce252..48cec16628e58 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -36,7 +36,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o text_dma.o +obj-y += version.o pgm_check_info.o ctype.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 716c35c1d78f2..641ce0fc5c3eb 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -26,10 +26,6 @@ extern int vmalloc_size_set; extern int kaslr_enabled; extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; -extern char _sdma[], _edma[]; -extern char _stext_dma[], _etext_dma[]; -extern struct exception_table_entry _start_dma_ex_table[]; -extern struct exception_table_entry _stop_dma_ex_table[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _stack_start[], _stack_end[]; diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d6a69aa6e9370..918e05137d4c6 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -37,29 +37,6 @@ SECTIONS *(.data.*) _edata = . ; } - /* - * .dma section for code, data, ex_table that need to stay below 2 GB, - * even when the kernel is relocate: above 2 GB. - */ - . = ALIGN(PAGE_SIZE); - _sdma = .; - .dma.text : { - _stext_dma = .; - *(.dma.text) - . = ALIGN(PAGE_SIZE); - _etext_dma = .; - } - . = ALIGN(16); - .dma.ex_table : { - _start_dma_ex_table = .; - KEEP(*(.dma.ex_table)) - _stop_dma_ex_table = .; - } - .dma.data : { - *(.dma.data) - } - . = ALIGN(PAGE_SIZE); - _edma = .; BOOT_DATA BOOT_DATA_PRESERVED diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 63a139a2f503f..40f4cff538b8d 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -317,7 +317,6 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0x300(256),0x300 xc 0xe00(256),0xe00 xc 0xf00(256),0xf00 - lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers stcke __LC_BOOT_CLOCK mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 spt 6f-.LPG0(%r13) @@ -336,35 +335,6 @@ SYM_CODE_END(startup_normal) .quad 0x0000000180000000,startup_pgm_check_handler .Lio_new_psw: .quad 0x0002000180000000,0x1f0 # disabled wait -.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space - .quad 0 # cr1: primary space segment table - .quad .Lduct # cr2: dispatchable unit control table - .quad 0 # cr3: instruction authorization - .quad 0xffff # cr4: instruction authorization - .quad .Lduct # cr5: primary-aste origin - .quad 0 # cr6: I/O interrupts - .quad 0 # cr7: secondary space segment table - .quad 0x0000000000008000 # cr8: access registers translation - .quad 0 # cr9: tracing off - .quad 0 # cr10: tracing off - .quad 0 # cr11: tracing off - .quad 0 # cr12: tracing off - .quad 0 # cr13: home space segment table - .quad 0xc0000000 # cr14: machine check handling off - .quad .Llinkage_stack # cr15: linkage stack operations - - .section .dma.data,"aw",@progbits -.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 - .long 0,0,0,0,0,0,0,0 -.Llinkage_stack: - .long 0,0,0x89000000,0,0,0,0x8a000000,0 - .align 64 -.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 - .align 128 -.Lduald:.rept 8 - .long 0x80000000,0,0,0 # invalid access-list entries - .endr - .previous #include "head_kdump.S" diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6206ca149d5ec..80c9ac71dea36 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -29,37 +29,6 @@ u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); -/* - * Some code and data needs to stay below 2 GB, even when the kernel would be - * relocated above 2 GB, because it has to use 31 bit addresses. - * Such code and data is part of the .dma section, and its location is passed - * over to the decompressed / relocated kernel via the .boot.preserved.data - * section. - */ -unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma); -unsigned long __bootdata_preserved(__edma) = __pa(&_edma); -unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma); -unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma); -struct exception_table_entry * - __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table; -struct exception_table_entry * - __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table; - -int _diag210_dma(struct diag210 *addr); -int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); -int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); -void _diag0c_dma(struct hypfs_diag0c_entry *entry); -void _diag308_reset_dma(void); -struct diag_ops __bootdata_preserved(diag_dma_ops) = { - .diag210 = _diag210_dma, - .diag26c = _diag26c_dma, - .diag14 = _diag14_dma, - .diag0c = _diag0c_dma, - .diag308_reset = _diag308_reset_dma -}; -static struct diag210 _diag210_tmp_dma __section(".dma.data"); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; - void error(char *x) { sclp_early_printk("\n\n"); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index ca8f85b53a902..0e943d753fcc0 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -309,6 +309,7 @@ int diag26c(void *req, void *resp, enum diag26c_sc subcode); struct hypfs_diag0c_entry; +/* This struct must contain only pointers/references into the text DMA section. */ struct diag_ops { int (*diag210)(struct diag210 *addr); int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode); @@ -319,4 +320,11 @@ struct diag_ops { extern struct diag_ops diag_dma_ops; extern struct diag210 *__diag210_tmp_dma; + +int _diag210_dma(struct diag210 *addr); +int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); +int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); +void _diag0c_dma(struct hypfs_diag0c_entry *entry); +void _diag308_reset_dma(void); + #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4a44ba5a2d734..389a3d7690c47 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o +obj-y += smp.o text_dma.o extra-y += head64.o vmlinux.lds diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index a3f47464c3f17..1e745c1a57866 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -14,6 +14,7 @@ #include #include #include +#include "entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -50,8 +51,16 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; -struct diag_ops __bootdata_preserved(diag_dma_ops); -struct diag210 *__bootdata_preserved(__diag210_tmp_dma); +struct diag_ops __dma_ref diag_dma_ops = { + .diag210 = _diag210_dma, + .diag26c = _diag26c_dma, + .diag14 = _diag14_dma, + .diag0c = _diag0c_dma, + .diag308_reset = _diag308_reset_dma +}; + +static struct diag210 _diag210_tmp_dma __section(".dma.data"); +struct diag210 __dma_ref *__diag210_tmp_dma = &_diag210_tmp_dma; static int show_diag_stat(struct seq_file *m, void *v) { diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1ab33465382ff..80ef613815df3 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -64,4 +64,13 @@ void stack_free(unsigned long stack); extern char kprobes_insn_page[]; +extern char _sdma[], _edma[]; +extern char _stext_dma[], _etext_dma[]; +extern struct exception_table_entry _start_dma_ex_table[]; +extern struct exception_table_entry _stop_dma_ex_table[]; + +#define __dma_data __section(".dma.data") +#define __dma_ref __section(".dma.refs") +extern long _start_dma_refs[], _end_dma_refs[]; + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 0c253886da789..114b5490ad8eb 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -21,6 +21,7 @@ ENTRY(startup_continue) larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base + lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers # # Setup stack # @@ -41,3 +42,19 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad 0 # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0xffff # cr4: instruction authorization + .quad 0 # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0x0000000000008000 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d4b47de1110f7..32d1324723f2f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -94,17 +94,64 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; +/* + * Some code and data needs to stay below 2 GB, even when the kernel would be + * relocated above 2 GB, because it has to use 31 bit addresses. + * Such code and data is part of the .dma section. + */ +unsigned long __dma_ref __sdma = __pa(&_sdma); +unsigned long __dma_ref __edma = __pa(&_edma); +unsigned long __dma_ref __stext_dma = __pa(&_stext_dma); +unsigned long __dma_ref __etext_dma = __pa(&_etext_dma); +struct exception_table_entry __dma_ref *__start_dma_ex_table = _start_dma_ex_table; +struct exception_table_entry __dma_ref *__stop_dma_ex_table = _stop_dma_ex_table; + +/* + * Control registers CR2, CR5 and CR15 are initialized with addresses + * of tables that must be placed below 2G which is handled by the DMA + * sections. + * Because the DMA sections are relocated below 2G at startup, + * the content of control registers CR2, CR5 and CR15 must be updated + * with new addresses after the relocation. The initial initialization of + * control registers occurs in head64.S and then gets updated again after DMA + * relocation. We must access the relevant DMA tables indirectly via + * pointers placed in the .dma.refs linker section. Those pointers get + * updated automatically during DMA relocation and always contain a valid + * address within DMA sections. + */ + +static __dma_data u32 __ctl_duct_dma[16] __aligned(64); + +static __dma_data u64 __ctl_aste_dma[8] __aligned(64) = { + [1] = 0xffffffffffffffff +}; + +static __dma_data u32 __ctl_duald_dma[32] __aligned(128) = { + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0 +}; + +static __dma_data u32 __ctl_linkage_stack_dma[8] __aligned(64) = { + 0, 0, 0x89000000, 0, + 0, 0, 0x8a000000, 0 +}; + +static u64 __dma_ref *__ctl_aste = __ctl_aste_dma; +static u32 __dma_ref *__ctl_duald = __ctl_duald_dma; +static u32 __dma_ref *__ctl_linkage_stack = __ctl_linkage_stack_dma; +static u32 __dma_ref *__ctl_duct = __ctl_duct_dma; + int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct mem_detect_info __bootdata(mem_detect); struct initrd_data __bootdata(initrd_data); -struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); -struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__stext_dma); -unsigned long __bootdata_preserved(__etext_dma); -unsigned long __bootdata_preserved(__sdma); -unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); @@ -753,7 +800,6 @@ static void __init reserve_kernel(void) memblock_reserve(0, HEAD_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); - memblock_reserve(__sdma, __edma - __sdma); } static void __init setup_memory(void) @@ -773,6 +819,53 @@ static void __init setup_memory(void) memblock_enforce_memory_limit(memblock_end_of_DRAM()); } +static void __init relocate_dma_section(void) +{ + unsigned long dma_addr, dma_size; + long dma_offset; + long *ptr; + + /* Allocate a new DMA capable memory region */ + dma_size = __edma - __sdma; + pr_info("Relocating DMA section of size 0x%08lx\n", dma_size); + dma_addr = (unsigned long)memblock_alloc_low(dma_size, PAGE_SIZE); + if (!dma_addr) + panic("Failed to allocate memory for DMA section\n"); + dma_offset = dma_addr - __sdma; + + /* Move original DMA section to the new one */ + memmove((void *)dma_addr, (void *)__sdma, dma_size); + /* Zero out the old DMA section to catch invalid accesses within it */ + memset((void *)__sdma, 0, dma_size); + + /* Update all DMA region references */ + for (ptr = _start_dma_refs; ptr != _end_dma_refs; ptr++) + *ptr += dma_offset; +} + +/* This must be called after DMA relocation */ +static void __init setup_cr(void) +{ + union ctlreg2 cr2; + union ctlreg5 cr5; + union ctlreg15 cr15; + + __ctl_duct[1] = (unsigned long)__ctl_aste; + __ctl_duct[2] = (unsigned long)__ctl_aste; + __ctl_duct[4] = (unsigned long)__ctl_duald; + + /* Update control registers CR2, CR5 and CR15 */ + __ctl_store(cr2.val, 2, 2); + __ctl_store(cr5.val, 5, 5); + __ctl_store(cr15.val, 15, 15); + cr2.ducto = (unsigned long)__ctl_duct >> 6; + cr5.pasteo = (unsigned long)__ctl_duct >> 6; + cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; + __ctl_load(cr2.val, 2, 2); + __ctl_load(cr5.val, 5, 5); + __ctl_load(cr15.val, 15, 15); +} + /* * Setup hardware capabilities. */ @@ -1061,6 +1154,9 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); + relocate_dma_section(); + setup_cr(); + setup_uv(); setup_memory_end(); setup_memory(); diff --git a/arch/s390/boot/text_dma.S b/arch/s390/kernel/text_dma.S similarity index 100% rename from arch/s390/boot/text_dma.S rename to arch/s390/kernel/text_dma.S diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 4c0e19145cc66..b18288d26ca8c 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -71,6 +71,13 @@ SECTIONS RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) BOOT_DATA_PRESERVED + . = ALIGN(8); + .dma.refs : { + _start_dma_refs = .; + *(.dma.refs) + _end_dma_refs = .; + } + _edata = .; /* End of data section */ /* will be freed after init */ @@ -136,6 +143,32 @@ SECTIONS BOOT_DATA + /* + * .dma section for code, data, ex_table that need to stay below 2 GB, + * even when the kernel is relocated above 2 GB. + */ + . = ALIGN(PAGE_SIZE); + _sdma = .; + .dma.text : { + _stext_dma = .; + *(.dma.text) + *(.dma.text.*_indirect_*) + . = ALIGN(PAGE_SIZE); + _etext_dma = .; + } + . = ALIGN(16); + .dma.ex_table : { + _start_dma_ex_table = .; + KEEP(*(.dma.ex_table)) + _stop_dma_ex_table = .; + } + . = ALIGN(PAGE_SIZE); + .dma.data : { + *(.dma.data) + } + . = ALIGN(PAGE_SIZE); + _edma = .; + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) -- 2.39.5