aboutsummaryrefslogtreecommitdiff
path: root/arch/loongarch/lib
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /arch/loongarch/lib
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'arch/loongarch/lib')
-rw-r--r--arch/loongarch/lib/Makefile7
-rw-r--r--arch/loongarch/lib/clear_user.S98
-rw-r--r--arch/loongarch/lib/copy_user.S123
-rw-r--r--arch/loongarch/lib/delay.c42
-rw-r--r--arch/loongarch/lib/dump_tlb.c111
-rw-r--r--arch/loongarch/lib/memcpy.S95
-rw-r--r--arch/loongarch/lib/memmove.S121
-rw-r--r--arch/loongarch/lib/memset.S91
-rw-r--r--arch/loongarch/lib/unaligned.S84
9 files changed, 772 insertions, 0 deletions
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
new file mode 100644
index 000000000..40bde6329
--- /dev/null
+++ b/arch/loongarch/lib/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch-specific library files.
+#
+
+lib-y += delay.o memset.o memcpy.o memmove.o \
+ clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
new file mode 100644
index 000000000..2dc48e61a
--- /dev/null
+++ b/arch/loongarch/lib/clear_user.S
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
+.L_fixup_handle_\to\():
+ addi.d a0, a1, (\to) * (-8)
+ jr ra
+.endr
+
+SYM_FUNC_START(__clear_user)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __clear_user_generic", \
+ "b __clear_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__clear_user)
+
+EXPORT_SYMBOL(__clear_user)
+
+/*
+ * unsigned long __clear_user_generic(void *addr, size_t size)
+ *
+ * a0: addr
+ * a1: size
+ */
+SYM_FUNC_START(__clear_user_generic)
+ beqz a1, 2f
+
+1: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgtz a1, 1b
+
+2: move a0, a1
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_0
+SYM_FUNC_END(__clear_user_generic)
+
+/*
+ * unsigned long __clear_user_fast(void *addr, unsigned long size)
+ *
+ * a0: addr
+ * a1: size
+ */
+SYM_FUNC_START(__clear_user_fast)
+ beqz a1, 10f
+
+ ori a2, zero, 64
+ blt a1, a2, 9f
+
+ /* set 64 bytes at a time */
+1: st.d zero, a0, 0
+2: st.d zero, a0, 8
+3: st.d zero, a0, 16
+4: st.d zero, a0, 24
+5: st.d zero, a0, 32
+6: st.d zero, a0, 40
+7: st.d zero, a0, 48
+8: st.d zero, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, -64
+ bge a1, a2, 1b
+
+ beqz a1, 10f
+
+ /* set the remaining bytes */
+9: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgt a1, zero, 9b
+
+ /* return */
+10: move a0, a1
+ jr ra
+
+ /* fixup and ex_table */
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_1
+ _asm_extable 3b, .L_fixup_handle_2
+ _asm_extable 4b, .L_fixup_handle_3
+ _asm_extable 5b, .L_fixup_handle_4
+ _asm_extable 6b, .L_fixup_handle_5
+ _asm_extable 7b, .L_fixup_handle_6
+ _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 9b, .L_fixup_handle_0
+SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
new file mode 100644
index 000000000..55ac6020a
--- /dev/null
+++ b/arch/loongarch/lib/copy_user.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
+.L_fixup_handle_\to\():
+ addi.d a0, a2, (\to) * (-8)
+ jr ra
+.endr
+
+SYM_FUNC_START(__copy_user)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __copy_user_generic", \
+ "b __copy_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__copy_user)
+
+EXPORT_SYMBOL(__copy_user)
+
+/*
+ * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
+ *
+ * a0: to
+ * a1: from
+ * a2: n
+ */
+SYM_FUNC_START(__copy_user_generic)
+ beqz a2, 3f
+
+1: ld.b t0, a1, 0
+2: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgtz a2, 1b
+
+3: move a0, a2
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_0
+SYM_FUNC_END(__copy_user_generic)
+
+/*
+ * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
+ *
+ * a0: to
+ * a1: from
+ * a2: n
+ */
+SYM_FUNC_START(__copy_user_fast)
+ beqz a2, 19f
+
+ ori a3, zero, 64
+ blt a2, a3, 17f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, 0
+2: ld.d t1, a1, 8
+3: ld.d t2, a1, 16
+4: ld.d t3, a1, 24
+5: ld.d t4, a1, 32
+6: ld.d t5, a1, 40
+7: ld.d t6, a1, 48
+8: ld.d t7, a1, 56
+9: st.d t0, a0, 0
+10: st.d t1, a0, 8
+11: st.d t2, a0, 16
+12: st.d t3, a0, 24
+13: st.d t4, a0, 32
+14: st.d t5, a0, 40
+15: st.d t6, a0, 48
+16: st.d t7, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, 64
+ addi.d a2, a2, -64
+ bge a2, a3, 1b
+
+ beqz a2, 19f
+
+ /* copy the remaining bytes */
+17: ld.b t0, a1, 0
+18: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 17b
+
+ /* return */
+19: move a0, a2
+ jr ra
+
+ /* fixup and ex_table */
+ _asm_extable 1b, .L_fixup_handle_0
+ _asm_extable 2b, .L_fixup_handle_1
+ _asm_extable 3b, .L_fixup_handle_2
+ _asm_extable 4b, .L_fixup_handle_3
+ _asm_extable 5b, .L_fixup_handle_4
+ _asm_extable 6b, .L_fixup_handle_5
+ _asm_extable 7b, .L_fixup_handle_6
+ _asm_extable 8b, .L_fixup_handle_7
+ _asm_extable 9b, .L_fixup_handle_0
+ _asm_extable 10b, .L_fixup_handle_1
+ _asm_extable 11b, .L_fixup_handle_2
+ _asm_extable 12b, .L_fixup_handle_3
+ _asm_extable 13b, .L_fixup_handle_4
+ _asm_extable 14b, .L_fixup_handle_5
+ _asm_extable 15b, .L_fixup_handle_6
+ _asm_extable 16b, .L_fixup_handle_7
+ _asm_extable 17b, .L_fixup_handle_0
+ _asm_extable 18b, .L_fixup_handle_0
+SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c
new file mode 100644
index 000000000..831d4761f
--- /dev/null
+++ b/arch/loongarch/lib/delay.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+
+#include <asm/processor.h>
+
+void __delay(unsigned long cycles)
+{
+ u64 t0 = get_cycles();
+
+ while ((unsigned long)(get_cycles() - t0) < cycles)
+ cpu_relax();
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * Division by multiplication: you don't have to worry about
+ * loss of precision.
+ *
+ * Use only for very small delays ( < 1 msec). Should probably use a
+ * lookup table, really, as the multiplications take much too long with
+ * short delays. This is a "reasonable" implementation, though (and the
+ * first constant multiplications gets optimized away if the delay is
+ * a constant)
+ */
+
+void __udelay(unsigned long us)
+{
+ __delay((us * 0x000010c7ull * HZ * lpj_fine) >> 32);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+ __delay((ns * 0x00000005ull * HZ * lpj_fine) >> 32);
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c
new file mode 100644
index 000000000..c2cc7ce34
--- /dev/null
+++ b/arch/loongarch/lib/dump_tlb.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 1994, 1995 by Waldorf Electronics, written by Ralf Baechle.
+ * Copyright (C) 1999 by Silicon Graphics, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/loongarch.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+void dump_tlb_regs(void)
+{
+ const int field = 2 * sizeof(unsigned long);
+
+ pr_info("Index : 0x%0x\n", read_csr_tlbidx());
+ pr_info("PageSize : 0x%0x\n", read_csr_pagesize());
+ pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi());
+ pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0());
+ pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1());
+}
+
+static void dump_tlb(int first, int last)
+{
+ unsigned long s_entryhi, entryhi, asid;
+ unsigned long long entrylo0, entrylo1, pa;
+ unsigned int index;
+ unsigned int s_index, s_asid;
+ unsigned int pagesize, c0, c1, i;
+ unsigned long asidmask = cpu_asid_mask(&current_cpu_data);
+ int pwidth = 16;
+ int vwidth = 16;
+ int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4);
+
+ s_entryhi = read_csr_entryhi();
+ s_index = read_csr_tlbidx();
+ s_asid = read_csr_asid();
+
+ for (i = first; i <= last; i++) {
+ write_csr_index(i);
+ tlb_read();
+ pagesize = read_csr_pagesize();
+ entryhi = read_csr_entryhi();
+ entrylo0 = read_csr_entrylo0();
+ entrylo1 = read_csr_entrylo1();
+ index = read_csr_tlbidx();
+ asid = read_csr_asid();
+
+ /* EHINV bit marks entire entry as invalid */
+ if (index & CSR_TLBIDX_EHINV)
+ continue;
+ /*
+ * ASID takes effect in absence of G (global) bit.
+ */
+ if (!((entrylo0 | entrylo1) & ENTRYLO_G) &&
+ asid != s_asid)
+ continue;
+
+ /*
+ * Only print entries in use
+ */
+ pr_info("Index: %4d pgsize=0x%x ", i, (1 << pagesize));
+
+ c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
+ c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
+
+ pr_cont("va=0x%0*lx asid=0x%0*lx",
+ vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask);
+
+ /* NR/NX are in awkward places, so mask them off separately */
+ pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX);
+ pa = pa & PAGE_MASK;
+ pr_cont("\n\t[");
+ pr_cont("nr=%d nx=%d ",
+ (entrylo0 & ENTRYLO_NR) ? 1 : 0,
+ (entrylo0 & ENTRYLO_NX) ? 1 : 0);
+ pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld] [",
+ pwidth, pa, c0,
+ (entrylo0 & ENTRYLO_D) ? 1 : 0,
+ (entrylo0 & ENTRYLO_V) ? 1 : 0,
+ (entrylo0 & ENTRYLO_G) ? 1 : 0,
+ (entrylo0 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT);
+ /* NR/NX are in awkward places, so mask them off separately */
+ pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX);
+ pa = pa & PAGE_MASK;
+ pr_cont("nr=%d nx=%d ",
+ (entrylo1 & ENTRYLO_NR) ? 1 : 0,
+ (entrylo1 & ENTRYLO_NX) ? 1 : 0);
+ pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n",
+ pwidth, pa, c1,
+ (entrylo1 & ENTRYLO_D) ? 1 : 0,
+ (entrylo1 & ENTRYLO_V) ? 1 : 0,
+ (entrylo1 & ENTRYLO_G) ? 1 : 0,
+ (entrylo1 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT);
+ }
+ pr_info("\n");
+
+ write_csr_entryhi(s_entryhi);
+ write_csr_tlbidx(s_index);
+ write_csr_asid(s_asid);
+}
+
+void dump_tlb_all(void)
+{
+ dump_tlb(0, current_cpu_data.tlbsize - 1);
+}
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
new file mode 100644
index 000000000..7c07d595e
--- /dev/null
+++ b/arch/loongarch/lib/memcpy.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memcpy)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __memcpy_generic", \
+ "b __memcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memcpy)
+
+EXPORT_SYMBOL(memcpy)
+
+/*
+ * void *__memcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_generic)
+ move a3, a0
+ beqz a2, 2f
+
+1: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__memcpy_generic)
+
+/*
+ * void *__memcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ ld.d t4, a1, 32
+ ld.d t5, a1, 40
+ ld.d t6, a1, 48
+ ld.d t7, a1, 56
+ st.d t0, a0, 0
+ st.d t1, a0, 8
+ st.d t2, a0, 16
+ st.d t3, a0, 24
+ st.d t4, a0, 32
+ st.d t5, a0, 40
+ st.d t6, a0, 48
+ st.d t7, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a1, a1, 64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* copy the remaining bytes */
+2: ld.b t0, a1, 0
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
new file mode 100644
index 000000000..6ffdb46da
--- /dev/null
+++ b/arch/loongarch/lib/memmove.S
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memmove)
+ blt a0, a1, 1f /* dst < src, memcpy */
+ blt a1, a0, 3f /* src < dst, rmemcpy */
+ jr ra /* dst == src, return */
+
+ /* if (src - dst) < 64, copy 1 byte at a time */
+1: ori a3, zero, 64
+ sub.d t0, a1, a0
+ blt t0, a3, 2f
+ b memcpy
+2: b __memcpy_generic
+
+ /* if (dst - src) < 64, copy 1 byte at a time */
+3: ori a3, zero, 64
+ sub.d t0, a0, a1
+ blt t0, a3, 4f
+ b rmemcpy
+4: b __rmemcpy_generic
+SYM_FUNC_END(memmove)
+
+EXPORT_SYMBOL(memmove)
+
+SYM_FUNC_START(rmemcpy)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __rmemcpy_generic", \
+ "b __rmemcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(rmemcpy)
+
+/*
+ * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_generic)
+ move a3, a0
+ beqz a2, 2f
+
+ add.d a0, a0, a2
+ add.d a1, a1, a2
+
+1: ld.b t0, a1, -1
+ st.b t0, a0, -1
+ addi.d a0, a0, -1
+ addi.d a1, a1, -1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__rmemcpy_generic)
+
+/*
+ * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ add.d a0, a0, a2
+ add.d a1, a1, a2
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* copy 64 bytes at a time */
+1: ld.d t0, a1, -8
+ ld.d t1, a1, -16
+ ld.d t2, a1, -24
+ ld.d t3, a1, -32
+ ld.d t4, a1, -40
+ ld.d t5, a1, -48
+ ld.d t6, a1, -56
+ ld.d t7, a1, -64
+ st.d t0, a0, -8
+ st.d t1, a0, -16
+ st.d t2, a0, -24
+ st.d t3, a0, -32
+ st.d t4, a0, -40
+ st.d t5, a0, -48
+ st.d t6, a0, -56
+ st.d t7, a0, -64
+
+ addi.d a0, a0, -64
+ addi.d a1, a1, -64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* copy the remaining bytes */
+2: ld.b t0, a1, -1
+ st.b t0, a0, -1
+ addi.d a0, a0, -1
+ addi.d a1, a1, -1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
new file mode 100644
index 000000000..e7cb4ea37
--- /dev/null
+++ b/arch/loongarch/lib/memset.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.macro fill_to_64 r0
+ bstrins.d \r0, \r0, 15, 8
+ bstrins.d \r0, \r0, 31, 16
+ bstrins.d \r0, \r0, 63, 32
+.endm
+
+SYM_FUNC_START(memset)
+ /*
+ * Some CPUs support hardware unaligned access
+ */
+ ALTERNATIVE "b __memset_generic", \
+ "b __memset_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memset)
+
+EXPORT_SYMBOL(memset)
+
+/*
+ * void *__memset_generic(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_generic)
+ move a3, a0
+ beqz a2, 2f
+
+1: st.b a1, a0, 0
+ addi.d a0, a0, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(__memset_generic)
+
+/*
+ * void *__memset_fast(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_fast)
+ move a3, a0
+ beqz a2, 3f
+
+ ori a4, zero, 64
+ blt a2, a4, 2f
+
+ /* fill a1 to 64 bits */
+ fill_to_64 a1
+
+ /* set 64 bytes at a time */
+1: st.d a1, a0, 0
+ st.d a1, a0, 8
+ st.d a1, a0, 16
+ st.d a1, a0, 24
+ st.d a1, a0, 32
+ st.d a1, a0, 40
+ st.d a1, a0, 48
+ st.d a1, a0, 56
+
+ addi.d a0, a0, 64
+ addi.d a2, a2, -64
+ bge a2, a4, 1b
+
+ beqz a2, 3f
+
+ /* set the remaining bytes */
+2: st.b a1, a0, 0
+ addi.d a0, a0, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 2b
+
+ /* return */
+3: move a0, a3
+ jr ra
+SYM_FUNC_END(__memset_fast)
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
new file mode 100644
index 000000000..9177fd638
--- /dev/null
+++ b/arch/loongarch/lib/unaligned.S
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.L_fixup_handle_unaligned:
+ li.w a0, -EFAULT
+ jr ra
+
+/*
+ * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ * a3: sign
+ */
+SYM_FUNC_START(unaligned_read)
+ beqz a2, 5f
+
+ li.w t2, 0
+ addi.d t0, a2, -1
+ slli.d t1, t0, 3
+ add.d a0, a0, t0
+
+ beqz a3, 2f
+1: ld.b t3, a0, 0
+ b 3f
+
+2: ld.bu t3, a0, 0
+3: sll.d t3, t3, t1
+ or t2, t2, t3
+ addi.d t1, t1, -8
+ addi.d a0, a0, -1
+ addi.d a2, a2, -1
+ bgtz a2, 2b
+4: st.d t2, a1, 0
+
+ move a0, a2
+ jr ra
+
+5: li.w a0, -EFAULT
+ jr ra
+
+ _asm_extable 1b, .L_fixup_handle_unaligned
+ _asm_extable 2b, .L_fixup_handle_unaligned
+ _asm_extable 4b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_read)
+
+/*
+ * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ */
+SYM_FUNC_START(unaligned_write)
+ beqz a2, 3f
+
+ li.w t0, 0
+1: srl.d t1, a1, t0
+2: st.b t1, a0, 0
+ addi.d t0, t0, 8
+ addi.d a2, a2, -1
+ addi.d a0, a0, 1
+ bgtz a2, 1b
+
+ move a0, a2
+ jr ra
+
+3: li.w a0, -EFAULT
+ jr ra
+
+ _asm_extable 2b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_write)