]> git.baikalelectronics.ru Git - kernel.git/commitdiff
ARM: module: implement support for PC-relative group relocations
authorArd Biesheuvel <ardb@kernel.org>
Wed, 24 Nov 2021 17:56:22 +0000 (18:56 +0100)
committerArd Biesheuvel <ardb@kernel.org>
Mon, 6 Dec 2021 11:49:16 +0000 (12:49 +0100)
Add support for the R_ARM_ALU_PC_Gn_NC and R_ARM_LDR_PC_G2 group
relocations [0] so we can use them in modules. These will be used to
load the current task pointer from a global variable without having to
rely on a literal pool entry to carry the address of this variable,
which may have a significant negative impact on cache utilization for
variables that are used often and in many different places, as each
occurrence will result in a literal pool entry and therefore a line in
the D-cache.

[0] 'ELF for the ARM architecture'
    https://github.com/ARM-software/abi-aa/releases

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
arch/arm/include/asm/elf.h
arch/arm/kernel/module.c

index b8102a6ddf1665fe5e393eda94c5d2ffa1932e34..d68101655b74ef0cca647d2ff6e3c59d55fc4a11 100644 (file)
@@ -61,6 +61,9 @@ typedef struct user_fp elf_fpregset_t;
 #define R_ARM_MOVT_ABS         44
 #define R_ARM_MOVW_PREL_NC     45
 #define R_ARM_MOVT_PREL                46
+#define R_ARM_ALU_PC_G0_NC     57
+#define R_ARM_ALU_PC_G1_NC     59
+#define R_ARM_LDR_PC_G2                63
 
 #define R_ARM_THM_CALL         10
 #define R_ARM_THM_JUMP24       30
index beac45e89ba64b07d20cd9d52b6025e2c86e4e8d..4d33a7acf617e3e3318d9f86a36ef050d910ea18 100644 (file)
@@ -68,6 +68,42 @@ bool module_exit_section(const char *name)
                strstarts(name, ".ARM.exidx.exit");
 }
 
+/*
+ * This implements the partitioning algorithm for group relocations as
+ * documented in the ARM AArch32 ELF psABI (IHI 0044).
+ *
+ * A single PC-relative symbol reference is divided in up to 3 add or subtract
+ * operations, where the final one could be incorporated into a load/store
+ * instruction with immediate offset. E.g.,
+ *
+ *   ADD       Rd, PC, #...            or      ADD     Rd, PC, #...
+ *   ADD       Rd, Rd, #...                    ADD     Rd, Rd, #...
+ *   LDR       Rd, [Rd, #...]                  ADD     Rd, Rd, #...
+ *
+ * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is
+ * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of
+ * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for
+ * any in-kernel symbol reference (unless module PLTs are being used).
+ *
+ * The main advantage of this approach over the typical pattern using a literal
+ * load is that literal loads may miss in the D-cache, and generally lead to
+ * lower cache efficiency for variables that are referenced often from many
+ * different places in the code.
+ */
+static u32 get_group_rem(u32 group, u32 *offset)
+{
+       u32 val = *offset;
+       u32 shift;
+       do {
+               shift = val ? (31 - __fls(val)) & ~1 : 32;
+               *offset = val;
+               if (!val)
+                       break;
+               val &= 0xffffff >> shift;
+       } while (group--);
+       return shift;
+}
+
 int
 apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
               unsigned int relindex, struct module *module)
@@ -82,6 +118,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
                unsigned long loc;
                Elf32_Sym *sym;
                const char *symname;
+               u32 shift, group = 1;
                s32 offset;
                u32 tmp;
 #ifdef CONFIG_THUMB2_KERNEL
@@ -212,6 +249,54 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
                        *(u32 *)loc = __opcode_to_mem_arm(tmp);
                        break;
 
+               case R_ARM_ALU_PC_G0_NC:
+                       group = 0;
+                       fallthrough;
+               case R_ARM_ALU_PC_G1_NC:
+                       tmp = __mem_to_opcode_arm(*(u32 *)loc);
+                       offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7);
+                       if (tmp & BIT(22))
+                               offset = -offset;
+                       offset += sym->st_value - loc;
+                       if (offset < 0) {
+                               offset = -offset;
+                               tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode
+                       } else {
+                               tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode
+                       }
+
+                       shift = get_group_rem(group, &offset);
+                       if (shift < 24) {
+                               offset >>= 24 - shift;
+                               offset |= (shift + 8) << 7;
+                       }
+                       *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
+                       break;
+
+               case R_ARM_LDR_PC_G2:
+                       tmp = __mem_to_opcode_arm(*(u32 *)loc);
+                       offset = tmp & 0xfff;
+                       if (~tmp & BIT(23))             // U bit cleared?
+                               offset = -offset;
+                       offset += sym->st_value - loc;
+                       if (offset < 0) {
+                               offset = -offset;
+                               tmp &= ~BIT(23);        // clear U bit
+                       } else {
+                               tmp |= BIT(23);         // set U bit
+                       }
+                       get_group_rem(2, &offset);
+
+                       if (offset > 0xfff) {
+                               pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+                                      module->name, relindex, i, symname,
+                                      ELF32_R_TYPE(rel->r_info), loc,
+                                      sym->st_value);
+                               return -ENOEXEC;
+                       }
+                       *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
+                       break;
+
 #ifdef CONFIG_THUMB2_KERNEL
                case R_ARM_THM_CALL:
                case R_ARM_THM_JUMP24: