aboutsummaryrefslogtreecommitdiff
path: root/arch/loongarch/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/loongarch/lib')
-rw-r--r--arch/loongarch/lib/Makefile7
-rw-r--r--arch/loongarch/lib/clear_user.S98
-rw-r--r--arch/loongarch/lib/copy_user.S123
-rw-r--r--arch/loongarch/lib/delay.c42
-rw-r--r--arch/loongarch/lib/dump_tlb.c111
-rw-r--r--arch/loongarch/lib/memcpy.S95
-rw-r--r--arch/loongarch/lib/memmove.S121
-rw-r--r--arch/loongarch/lib/memset.S91
-rw-r--r--arch/loongarch/lib/unaligned.S84
9 files changed, 772 insertions, 0 deletions
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
new file mode 100644
index 000000000..40bde6329
--- /dev/null
+++ b/arch/loongarch/lib/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch-specific library files.
+#
+
+lib-y += delay.o memset.o memcpy.o memmove.o \
+ clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
new file mode 100644
index 000000000..2dc48e61a
--- /dev/null
+++ b/arch/loongarch/lib/clear_user.S
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
+.L_fixup_handle_\to\():
+ addi.d a0, a1, (\to) * (-8)
+ jr ra
+.endr
+
+SYM_FUNC_START(__clear_user)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __clear_user_generic", \
+ "b __clear_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__clear_user)
+
+EXPORT_SYMBOL(__clear_user)
+
+/*
+ * unsigned long __clear_user_generic(void *addr, size_t size)
+ *
+ * a0: addr
+ * a1: size
+ */
+SYM_FUNC_START(__clear_user_generic)
+ beqz a1, 2f
+
+1: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgtz a1, 1b
+
+2: move a0, a1
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_0
+SYM_FUNC_END(__clear_user_generic)
+
+/*
+ * unsigned long __clear_user_fast(void *addr, unsigned long size)
+ *
+ * a0: addr
+ * a1: size
+ */
+SYM_FUNC_START(__clear_user_fast)
+ beqz a1, 10f
+
+ ori a2, zero, 64
+ blt a1, a2, 9f
+
+ /* set 64 bytes at a time */
+1: st.d zero, a0, 0
+2: st.d zero, a0, 8
+3: st.d zero, a0, 16
+4: st.d zero, a0, 24
+5: st.d zero, a0, 32
+6: st.d zero, a0, 40
+7: st.d zero, a0, 48
+8: st.d zero, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, -64
+ bge a1, a2, 1b
+
+ beqz a1, 10f
+
+ /* set the remaining bytes */
+9: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgt a1, zero, 9b
+
+ /* return */
+10: move a0, a1
+ jr ra
+
+ /* fixup and ex_table */
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_1
+ _asm_extable 3b, .L_fixup_handle_2
+ _asm_extable 4b, .L_fixup_handle_3
+ _asm_extable 5b, .L_fixup_handle_4
+ _asm_extable 6b, .L_fixup_handle_5
+ _asm_extable 7b, .L_fixup_handle_6
+ _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 9b, .L_fixup_handle_0
+SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
new file mode 100644
index 000000000..55ac6020a
--- /dev/null
+++ b/arch/loongarch/lib/copy_user.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
+.L_fixup_handle_\to\():
+ addi.d a0, a2, (\to) * (-8)
+ jr ra
+.endr
+
+SYM_FUNC_START(__copy_user)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __copy_user_generic", \
+ "b __copy_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__copy_user)
+
+EXPORT_SYMBOL(__copy_user)
+
+/*
+ * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
+ *
+ * a0: to
+ * a1: from
+ * a2: n
+ */
+SYM_FUNC_START(__copy_user_generic)
+ beqz a2, 3f
+
+1: ld.b t0, a1, 0
+2: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgtz a2, 1b
+
+3: move a0, a2
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_0
+SYM_FUNC_END(__copy_user_generic)
+
+/*
+ * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
+ *
+ * a0: to
+ * a1: from
+ * a2: n
+ */
+SYM_FUNC_START(__copy_user_fast)
+ beqz a2, 19f
+
+ ori a3, zero, 64
+ blt a2, a3, 17f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, 0
+2: ld.d t1, a1, 8
+3: ld.d t2, a1, 16
+4: ld.d t3, a1, 24
+5: ld.d t4, a1, 32
+6: ld.d t5, a1, 40
+7: ld.d t6, a1, 48
+8: ld.d t7, a1, 56
+9: st.d t0, a0, 0
+10: st.d t1, a0, 8
+11: st.d t2, a0, 16
+12: st.d t3, a0, 24
+13: st.d t4, a0, 32
+14: st.d t5, a0, 40
+15: st.d t6, a0, 48
+16: st.d t7, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, 64
+ addi.d a2, a2, -64
+ bge a2, a3, 1b
+
+ beqz a2, 19f
+
+ /* copy the remaining bytes */
+17: ld.b t0, a1, 0
+18: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 17b
+
+ /* return */
+19: move a0, a2
+ jr ra
+
+ /* fixup and ex_table */
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_1
+ _asm_extable 3b, .L_fixup_handle_2
+ _asm_extable 4b, .L_fixup_handle_3
+ _asm_extable 5b, .L_fixup_handle_4
+ _asm_extable 6b, .L_fixup_handle_5
+ _asm_extable 7b, .L_fixup_handle_6
+ _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 9b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_1
+ _asm_extable 11b, .L_fixup_handle_2
+ _asm_extable 12b, .L_fixup_handle_3
+ _asm_extable 13b, .L_fixup_handle_4
+ _asm_extable 14b, .L_fixup_handle_5
+ _asm_extable 15b, .L_fixup_handle_6
+ _asm_extable 16b, .L_fixup_handle_7
+ _asm_extable 17b, .L_fixup_handle_0
+ _asm_extable 18b, .L_fixup_handle_0
+SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c
new file mode 100644
index 000000000..831d4761f
--- /dev/null
+++ b/arch/loongarch/lib/delay.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+
+#include <asm/processor.h>
+
+void __delay(unsigned long cycles)
+{
+ u64 t0 = get_cycles();
+
+ while ((unsigned long)(get_cycles() - t0) < cycles)
+ cpu_relax();
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * Division by multiplication: you don't have to worry about
+ * loss of precision.
+ *
+ * Use only for very small delays ( < 1 msec). Should probably use a
+ * lookup table, really, as the multiplications take much too long with
+ * short delays. This is a "reasonable" implementation, though (and the
+ * first constant multiplications gets optimized away if the delay is
+ * a constant)
+ */
+
+void __udelay(unsigned long us)
+{
+ __delay((us * 0x000010c7ull * HZ * lpj_fine) >> 32);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+ __delay((ns * 0x00000005ull * HZ * lpj_fine) >> 32);
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c
new file mode 100644
index 000000000..c2cc7ce34
--- /dev/null
+++ b/arch/loongarch/lib/dump_tlb.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 1994, 1995 by Waldorf Electronics, written by Ralf Baechle.
+ * Copyright (C) 1999 by Silicon Graphics, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/loongarch.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+void dump_tlb_regs(void)
+{
+ const int field = 2 * sizeof(unsigned long);
+
+ pr_info("Index : 0x%0x\n", read_csr_tlbidx());
+ pr_info("PageSize : 0x%0x\n", read_csr_pagesize());
+ pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi());
+ pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0());
+ pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1());
+}
+
+static void dump_tlb(int first, int last)
+{
+ unsigned long s_entryhi, entryhi, asid;
+ unsigned long long entrylo0, entrylo1, pa;
+ unsigned int index;
+ unsigned int s_index, s_asid;
+ unsigned int pagesize, c0, c1, i;
+ unsigned long asidmask = cpu_asid_mask(&current_cpu_data);
+ int pwidth = 16;
+ int vwidth = 16;
+ int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4);
+
+ s_entryhi = read_csr_entryhi();
+ s_index = read_csr_tlbidx();
+ s_asid = read_csr_asid();
+
+ for (i = first; i <= last; i++) {
+ write_csr_index(i);
+ tlb_read();
+ pagesize = read_csr_pagesize();
+ entryhi = read_csr_entryhi();
+ entrylo0 = read_csr_entrylo0();
+ entrylo1 = read_csr_entrylo1();
+ index = read_csr_tlbidx();
+ asid = read_csr_asid();
+
+ /* EHINV bit marks entire entry as invalid */
+ if (index & CSR_TLBIDX_EHINV)
+ continue;
+ /*
+ * ASID takes effect in absence of G (global) bit.
+ */
+ if (!((entrylo0 | entrylo1) & ENTRYLO_G) &&
+ asid != s_asid)
+ continue;
+
+ /*
+ * Only print entries in use
+ */
+ pr_info("Index: %4d pgsize=0x%x ", i, (1 << pagesize));
+
+ c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
+ c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
+
+ pr_cont("va=0x%0*lx asid=0x%0*lx",
+ vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask);
+
+ /* NR/NX are in awkward places, so mask them off separately */
+ pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX);
+ pa = pa & PAGE_MASK;
+ pr_cont("\n\t[");
+ pr_cont("nr=%d nx=%d ",
+ (entrylo0 & ENTRYLO_NR) ? 1 : 0,
+ (entrylo0 & ENTRYLO_NX) ? 1 : 0);
+ pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld] [",
+ pwidth, pa, c0,
+ (entrylo0 & ENTRYLO_D) ? 1 : 0,
+ (entrylo0 & ENTRYLO_V) ? 1 : 0,
+ (entrylo0 & ENTRYLO_G) ? 1 : 0,
+ (entrylo0 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT);
+ /* NR/NX are in awkward places, so mask them off separately */
+ pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX);
+ pa = pa & PAGE_MASK;
+ pr_cont("nr=%d nx=%d ",
+ (entrylo1 & ENTRYLO_NR) ? 1 : 0,
+ (entrylo1 & ENTRYLO_NX) ? 1 : 0);
+ pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n",
+ pwidth, pa, c1,
+ (entrylo1 & ENTRYLO_D) ? 1 : 0,
+ (entrylo1 & ENTRYLO_V) ? 1 : 0,
+ (entrylo1 & ENTRYLO_G) ? 1 : 0,
+ (entrylo1 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT);
+ }
+ pr_info("\n");
+
+ write_csr_entryhi(s_entryhi);
+ write_csr_tlbidx(s_index);
+ write_csr_asid(s_asid);
+}
+
+void dump_tlb_all(void)
+{
+ dump_tlb(0, current_cpu_data.tlbsize - 1);
+}
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
new file mode 100644
index 000000000..7c07d595e
--- /dev/null
+++ b/arch/loongarch/lib/memcpy.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memcpy)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __memcpy_generic", \
+ "b __memcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memcpy)
+
+EXPORT_SYMBOL(memcpy)
+
+/*
+ * void *__memcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_generic)
+ move a3, a0
+ beqz a2, 2f
+
+1: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__memcpy_generic)
+
+/*
+ * void *__memcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ ld.d t4, a1, 32
+ ld.d t5, a1, 40
+ ld.d t6, a1, 48
+ ld.d t7, a1, 56
+ st.d t0, a0, 0
+ st.d t1, a0, 8
+ st.d t2, a0, 16
+ st.d t3, a0, 24
+ st.d t4, a0, 32
+ st.d t5, a0, 40
+ st.d t6, a0, 48
+ st.d t7, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, 64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* copy the remaining bytes */
+2: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
new file mode 100644
index 000000000..6ffdb46da
--- /dev/null
+++ b/arch/loongarch/lib/memmove.S
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memmove)
+ blt a0, a1, 1f /* dst < src, memcpy */
+ blt a1, a0, 3f /* src < dst, rmemcpy */
+ jr ra /* dst == src, return */
+
+ /* if (src - dst) < 64, copy 1 byte at a time */
+1: ori a3, zero, 64
+ sub.d t0, a1, a0
+ blt t0, a3, 2f
+ b memcpy
+2: b __memcpy_generic
+
+ /* if (dst - src) < 64, copy 1 byte at a time */
+3: ori a3, zero, 64
+ sub.d t0, a0, a1
+ blt t0, a3, 4f
+ b rmemcpy
+4: b __rmemcpy_generic
+SYM_FUNC_END(memmove)
+
+EXPORT_SYMBOL(memmove)
+
+SYM_FUNC_START(rmemcpy)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __rmemcpy_generic", \
+ "b __rmemcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(rmemcpy)
+
+/*
+ * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_generic)
+ move a3, a0
+ beqz a2, 2f
+
+ add.d a0, a0, a2
+ add.d a1, a1, a2
+
+1: ld.b t0, a1, -1
+ st.b t0, a0, -1
+ addi.d a0, a0, -1
+ addi.d a1, a1, -1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__rmemcpy_generic)
+
+/*
+ * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ add.d a0, a0, a2
+ add.d a1, a1, a2
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, -8
+ ld.d t1, a1, -16
+ ld.d t2, a1, -24
+ ld.d t3, a1, -32
+ ld.d t4, a1, -40
+ ld.d t5, a1, -48
+ ld.d t6, a1, -56
+ ld.d t7, a1, -64
+ st.d t0, a0, -8
+ st.d t1, a0, -16
+ st.d t2, a0, -24
+ st.d t3, a0, -32
+ st.d t4, a0, -40
+ st.d t5, a0, -48
+ st.d t6, a0, -56
+ st.d t7, a0, -64
+
+ addi.d a0, a0, -64
+ addi.d a1, a1, -64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* copy the remaining bytes */
+2: ld.b t0, a1, -1
+ st.b t0, a0, -1
+ addi.d a0, a0, -1
+ addi.d a1, a1, -1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
new file mode 100644
index 000000000..e7cb4ea37
--- /dev/null
+++ b/arch/loongarch/lib/memset.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.macro fill_to_64 r0
+ bstrins.d \r0, \r0, 15, 8
+ bstrins.d \r0, \r0, 31, 16
+ bstrins.d \r0, \r0, 63, 32
+.endm
+
+SYM_FUNC_START(memset)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __memset_generic", \
+ "b __memset_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memset)
+
+EXPORT_SYMBOL(memset)
+
+/*
+ * void *__memset_generic(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_generic)
+ move a3, a0
+ beqz a2, 2f
+
+1: st.b a1, a0, 0
+ addi.d a0, a0, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__memset_generic)
+
+/*
+ * void *__memset_fast(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* fill a1 to 64 bits */
+ fill_to_64 a1
+
+ /* set 64 bytes at a time */
+1: st.d a1, a0, 0
+ st.d a1, a0, 8
+ st.d a1, a0, 16
+ st.d a1, a0, 24
+ st.d a1, a0, 32
+ st.d a1, a0, 40
+ st.d a1, a0, 48
+ st.d a1, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* set the remaining bytes */
+2: st.b a1, a0, 0
+ addi.d a0, a0, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__memset_fast)
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
new file mode 100644
index 000000000..9177fd638
--- /dev/null
+++ b/arch/loongarch/lib/unaligned.S
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.L_fixup_handle_unaligned:
+ li.w a0, -EFAULT
+ jr ra
+
+/*
+ * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ * a3: sign
+ */
+SYM_FUNC_START(unaligned_read)
+ beqz a2, 5f
+
+ li.w t2, 0
+ addi.d t0, a2, -1
+ slli.d t1, t0, 3
+ add.d a0, a0, t0
+
+ beqz a3, 2f
+1: ld.b t3, a0, 0
+ b 3f
+
+2: ld.bu t3, a0, 0
+3: sll.d t3, t3, t1
+ or t2, t2, t3
+ addi.d t1, t1, -8
+ addi.d a0, a0, -1
+ addi.d a2, a2, -1
+ bgtz a2, 2b
+4: st.d t2, a1, 0
+
+ move a0, a2
+ jr ra
+
+5: li.w a0, -EFAULT
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_unaligned
+ _asm_extable 2b, .L_fixup_handle_unaligned
+ _asm_extable 4b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_read)
+
+/*
+ * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ */
+SYM_FUNC_START(unaligned_write)
+ beqz a2, 3f
+
+ li.w t0, 0
+1: srl.d t1, a1, t0
+2: st.b t1, a0, 0
+ addi.d t0, t0, 8
+ addi.d a2, a2, -1
+ addi.d a0, a0, 1
+ bgtz a2, 1b
+
+ move a0, a2
+ jr ra
+
+3: li.w a0, -EFAULT
+ jr ra
+
+ _asm_extable 2b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_write)