+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-
-.macro GET_FRONT_BITS rx y
-#ifdef __cskyLE__
- lsri \rx, \y
-#else
- lsli \rx, \y
-#endif
-.endm
-
-.macro GET_AFTER_BITS rx y
-#ifdef __cskyLE__
- lsli \rx, \y
-#else
- lsri \rx, \y
-#endif
-.endm
-
-/* void *memcpy(void *dest, const void *src, size_t n); */
-ENTRY(memcpy)
- mov r7, r2
- cmplti r4, 4
- bt .L_copy_by_byte
- mov r6, r2
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_not_aligned
- mov r6, r3
- andi r6, 3
- cmpnei r6, 0
- jbt .L_dest_aligned_but_src_not_aligned
-.L0:
- cmplti r4, 16
- jbt .L_aligned_and_len_less_16bytes
- subi sp, 8
- stw r8, (sp, 0)
-.L_aligned_and_len_larger_16bytes:
- ldw r1, (r3, 0)
- ldw r5, (r3, 4)
- ldw r8, (r3, 8)
- stw r1, (r7, 0)
- ldw r1, (r3, 12)
- stw r5, (r7, 4)
- stw r8, (r7, 8)
- stw r1, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L_aligned_and_len_larger_16bytes
- ldw r8, (sp, 0)
- addi sp, 8
- cmpnei r4, 0
- jbf .L_return
-
-.L_aligned_and_len_less_16bytes:
- cmplti r4, 4
- bt .L_copy_by_byte
-.L1:
- ldw r1, (r3, 0)
- stw r1, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- jbf .L1
- br .L_copy_by_byte
-
-.L_return:
- rts
-
-.L_copy_by_byte: /* len less than 4 bytes */
- cmpnei r4, 0
- jbf .L_return
-.L4:
- ldb r1, (r3, 0)
- stb r1, (r7, 0)
- addi r3, 1
- addi r7, 1
- decne r4
- jbt .L4
- rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the dest align.
- * Afther that, we judge whether the src is aligned.
- */
-.L_dest_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- mov r5, r6
-.L5:
- ldb r1, (r3, 0) /* makes the dest align. */
- stb r1, (r7, 0)
- addi r5, 1
- subi r4, 1
- addi r3, 1
- addi r7, 1
- cmpnei r5, 4
- jbt .L5
- cmplti r4, 4
- jbt .L_copy_by_byte
- mov r6, r3 /* judge whether the src is aligned. */
- andi r6, 3
- cmpnei r6, 0
- jbf .L0
-
-/* Judge the number of misaligned, 1, 2, 3? */
-.L_dest_aligned_but_src_not_aligned:
- mov r5, r3
- rsub r5, r5, r7
- abs r5, r5
- cmplt r5, r4
- bt .L_copy_by_byte
- bclri r3, 0
- bclri r3, 1
- ldw r1, (r3, 0)
- addi r3, 4
- cmpnei r6, 2
- bf .L_dest_aligned_but_src_not_aligned_2bytes
- cmpnei r6, 3
- bf .L_dest_aligned_but_src_not_aligned_3bytes
-
-.L_dest_aligned_but_src_not_aligned_1byte:
- mov r5, r7
- sub r5, r3
- cmphs r5, r4
- bf .L_copy_by_byte
- cmplti r4, 16
- bf .L11
-.L10: /* If the len is less than 16 bytes */
- GET_FRONT_BITS r1 8
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 24
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-.L11:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L12:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 8 /* little or big endian? */
- mov r10, r5
- GET_AFTER_BITS r5 24
- or r5, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r11
- GET_AFTER_BITS r11 24
- or r11, r10
-
- GET_FRONT_BITS r1 8
- mov r10, r8
- GET_AFTER_BITS r8 24
- or r8, r1
-
- GET_FRONT_BITS r10 8
- mov r1, r9
- GET_AFTER_BITS r9 24
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L12
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp , 16
- cmplti r4, 4
- bf .L10
- subi r3, 3
- br .L_copy_by_byte
-
-.L_dest_aligned_but_src_not_aligned_2bytes:
- cmplti r4, 16
- bf .L21
-.L20:
- GET_FRONT_BITS r1 16
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 16
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
- rts
-
-.L21: /* n > 16 */
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-
-.L22:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 16
- mov r10, r5
- GET_AFTER_BITS r5 16
- or r5, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r11
- GET_AFTER_BITS r11 16
- or r11, r10
-
- GET_FRONT_BITS r1 16
- mov r10, r8
- GET_AFTER_BITS r8 16
- or r8, r1
-
- GET_FRONT_BITS r10 16
- mov r1, r9
- GET_AFTER_BITS r9 16
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L22
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L20
- subi r3, 2
- br .L_copy_by_byte
-
-
-.L_dest_aligned_but_src_not_aligned_3bytes:
- cmplti r4, 16
- bf .L31
-.L30:
- GET_FRONT_BITS r1 24
- mov r5, r1
- ldw r6, (r3, 0)
- mov r1, r6
- GET_AFTER_BITS r6 8
- or r5, r6
- stw r5, (r7, 0)
- subi r4, 4
- addi r3, 4
- addi r7, 4
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
-.L31:
- subi sp, 16
- stw r8, (sp, 0)
- stw r9, (sp, 4)
- stw r10, (sp, 8)
- stw r11, (sp, 12)
-.L32:
- ldw r5, (r3, 0)
- ldw r11, (r3, 4)
- ldw r8, (r3, 8)
- ldw r9, (r3, 12)
-
- GET_FRONT_BITS r1 24
- mov r10, r5
- GET_AFTER_BITS r5 8
- or r5, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r11
- GET_AFTER_BITS r11 8
- or r11, r10
-
- GET_FRONT_BITS r1 24
- mov r10, r8
- GET_AFTER_BITS r8 8
- or r8, r1
-
- GET_FRONT_BITS r10 24
- mov r1, r9
- GET_AFTER_BITS r9 8
- or r9, r10
-
- stw r5, (r7, 0)
- stw r11, (r7, 4)
- stw r8, (r7, 8)
- stw r9, (r7, 12)
- subi r4, 16
- addi r3, 16
- addi r7, 16
- cmplti r4, 16
- jbf .L32
- ldw r8, (sp, 0)
- ldw r9, (sp, 4)
- ldw r10, (sp, 8)
- ldw r11, (sp, 12)
- addi sp, 16
- cmplti r4, 4
- bf .L30
- subi r3, 1
- br .L_copy_by_byte
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * String functions optimized for hardware which doesn't
+ * handle unaligned memory accesses efficiently.
+ *
+ * Copyright (C) 2021 Matteo Croce
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+/* Minimum size for a word copy to be convenient */
+#define BYTES_LONG sizeof(long)
+#define WORD_MASK (BYTES_LONG - 1)
+#define MIN_THRESHOLD (BYTES_LONG * 2)
+
+/* convenience union to avoid cast between different pointer types */
+union types {
+ u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+union const_types {
+ const u8 *as_u8;
+ unsigned long *as_ulong;
+ uintptr_t as_uptr;
+};
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ union const_types s = { .as_u8 = src };
+ union types d = { .as_u8 = dest };
+ int distance = 0;
+
+ if (count < MIN_THRESHOLD)
+ goto copy_remainder;
+
+ /* Copy a byte at time until destination is aligned. */
+ for (; d.as_uptr & WORD_MASK; count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ distance = s.as_uptr & WORD_MASK;
+
+ if (distance) {
+ unsigned long last, next;
+
+ /*
+ * s is distance bytes ahead of d, and d just reached
+ * the alignment boundary. Move s backward to word align it
+ * and shift data to compensate for distance, in order to do
+ * word-by-word copy.
+ */
+ s.as_u8 -= distance;
+
+ next = s.as_ulong[0];
+ for (; count >= BYTES_LONG; count -= BYTES_LONG) {
+ last = next;
+ next = s.as_ulong[1];
+
+ d.as_ulong[0] = last >> (distance * 8) |
+ next << ((BYTES_LONG - distance) * 8);
+
+ d.as_ulong++;
+ s.as_ulong++;
+ }
+
+ /* Restore s with the original offset. */
+ s.as_u8 += distance;
+ } else {
+ /*
+ * If the source and dest lower bits are the same, do a simple
+ * 32/64 bit wide copy.
+ */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *d.as_ulong++ = *s.as_ulong++;
+ }
+
+copy_remainder:
+ while (count--)
+ *d.as_u8++ = *s.as_u8++;
+
+ return dest;
+}
+EXPORT_SYMBOL(memcpy);
+
+/*
+ * Simply check if the buffer overlaps an call memcpy() in case,
+ * otherwise do a simple one byte at time backward copy.
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+ if (dest < src || src + count <= dest)
+ return memcpy(dest, src, count);
+
+ if (dest > src) {
+ const char *s = src + count;
+ char *tmp = dest + count;
+
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+void *memset(void *s, int c, size_t count)
+{
+ union types dest = { .as_u8 = s };
+
+ if (count >= MIN_THRESHOLD) {
+ unsigned long cu = (unsigned long)c;
+
+ /* Compose an ulong with 'c' repeated 4/8 times */
+ cu |= cu << 8;
+ cu |= cu << 16;
+ /* Suppress warning on 32 bit machines */
+ cu |= (cu << 16) << 16;
+
+ for (; count && dest.as_uptr & WORD_MASK; count--)
+ *dest.as_u8++ = c;
+
+ /* Copy using the largest size allowed */
+ for (; count >= BYTES_LONG; count -= BYTES_LONG)
+ *dest.as_ulong++ = cu;
+ }
+
+ /* copy the remainder */
+ while (count--)
+ *dest.as_u8++ = c;
+
+ return s;
+}
+EXPORT_SYMBOL(memset);