aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/mce.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /arch/powerpc/kernel/mce.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'arch/powerpc/kernel/mce.c')
-rw-r--r--arch/powerpc/kernel/mce.c767
1 files changed, 767 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 000000000..6c5d30fba
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Machine check exception handling.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <linux/extable.h>
+#include <linux/ftrace.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/mce.h>
+#include <asm/nmi.h>
+
+#include "setup.h"
+
+static void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
+static void mce_set_error_info(struct machine_check_event *mce,
+ struct mce_error_info *mce_err)
+{
+ mce->error_type = mce_err->error_type;
+ switch (mce_err->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+ break;
+ case MCE_ERROR_TYPE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+void mce_irq_work_queue(void)
+{
+ /* Raise decrementer interrupt */
+ arch_irq_work_raise();
+ set_mce_pending_irq_work();
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err,
+ uint64_t nip, uint64_t addr, uint64_t phys_addr)
+{
+ int index = local_paca->mce_info->mce_nest_count++;
+ struct machine_check_event *mce;
+
+ mce = &local_paca->mce_info->mce_event[index];
+ /*
+ * Return if we don't have enough space to log mce event.
+ * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+ * the check below will stop buffer overrun.
+ */
+ if (index >= MAX_MC_EVT)
+ return;
+
+ /* Populate generic machine check info */
+ mce->version = MCE_V1;
+ mce->srr0 = nip;
+ mce->srr1 = regs->msr;
+ mce->gpr3 = regs->gpr[3];
+ mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
+
+ /* Mark it recovered if we have handled it and MSR(RI=1). */
+ if (handled && (regs->msr & MSR_RI))
+ mce->disposition = MCE_DISPOSITION_RECOVERED;
+ else
+ mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+
+ mce->initiator = mce_err->initiator;
+ mce->severity = mce_err->severity;
+ mce->sync_error = mce_err->sync_error;
+ mce->error_class = mce_err->error_class;
+
+ /*
+ * Populate the mce error_type and type-specific error_type.
+ */
+ mce_set_error_info(mce, mce_err);
+ if (mce->error_type == MCE_ERROR_TYPE_UE)
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
+
+ if (!addr)
+ return;
+
+ if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+ mce->u.user_error.effective_address_provided = true;
+ mce->u.user_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+ mce->u.ra_error.effective_address_provided = true;
+ mce->u.ra_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+ mce->u.link_error.effective_address_provided = true;
+ mce->u.link_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ if (phys_addr != ULONG_MAX) {
+ mce->u.ue_error.physical_address_provided = true;
+ mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
+ }
+ }
+ return;
+}
+
+/*
+ * get_mce_event:
+ * mce Pointer to machine_check_event structure to be filled.
+ * release Flag to indicate whether to free the event slot or not.
+ * 0 <= do not release the mce event. Caller will invoke
+ * release_mce_event() once event has been consumed.
+ * 1 <= release the slot.
+ *
+ * return 1 = success
+ * 0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+ int index = local_paca->mce_info->mce_nest_count - 1;
+ struct machine_check_event *mc_evt;
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info to process. */
+ if (index < MAX_MC_EVT) {
+ mc_evt = &local_paca->mce_info->mce_event[index];
+ /* Copy the event structure and release the original */
+ if (mce)
+ *mce = *mc_evt;
+ if (release)
+ mc_evt->in_use = 0;
+ ret = 1;
+ }
+ /* Decrement the count to free the slot. */
+ if (release)
+ local_paca->mce_info->mce_nest_count--;
+
+ return ret;
+}
+
+void release_mce_event(void)
+{
+ get_mce_event(NULL, true);
+}
+
+static void machine_check_ue_work(void)
+{
+ schedule_work(&mce_ue_event_work);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+static void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = local_paca->mce_info->mce_ue_count++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ local_paca->mce_info->mce_ue_count--;
+ return;
+ }
+ memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
+ evt, sizeof(*evt));
+
+ /* Queue work to process this event later. */
+ mce_irq_work_queue();
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+ int index;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+
+ index = local_paca->mce_info->mce_queue_count++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ local_paca->mce_info->mce_queue_count--;
+ return;
+ }
+ memcpy(&local_paca->mce_info->mce_event_queue[index],
+ &evt, sizeof(evt));
+
+ mce_irq_work_queue();
+}
+
+void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
+{
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs_set_return_ip(regs, extable_fixup(entry));
+ }
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ while (local_paca->mce_info->mce_ue_count > 0) {
+ index = local_paca->mce_info->mce_ue_count - 1;
+ evt = &local_paca->mce_info->mce_ue_event_queue[index];
+ blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ local_paca->mce_info->mce_ue_count--;
+ continue;
+ }
+
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ local_paca->mce_info->mce_ue_count--;
+ }
+}
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_check_process_queued_event(void)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ /*
+ * For now just print it to console.
+ * TODO: log this error event to FSP or nvram.
+ */
+ while (local_paca->mce_info->mce_queue_count > 0) {
+ index = local_paca->mce_info->mce_queue_count - 1;
+ evt = &local_paca->mce_info->mce_event_queue[index];
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ local_paca->mce_info->mce_queue_count--;
+ continue;
+ }
+ machine_check_print_event_info(evt, false, false);
+ local_paca->mce_info->mce_queue_count--;
+ }
+}
+
+void set_mce_pending_irq_work(void)
+{
+ local_paca->mce_pending_irq_work = 1;
+}
+
+void clear_mce_pending_irq_work(void)
+{
+ local_paca->mce_pending_irq_work = 0;
+}
+
+void mce_run_irq_context_handlers(void)
+{
+ if (unlikely(local_paca->mce_pending_irq_work)) {
+ if (ppc_md.machine_check_log_err)
+ ppc_md.machine_check_log_err();
+ machine_check_process_queued_event();
+ machine_check_ue_work();
+ clear_mce_pending_irq_work();
+ }
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode, bool in_guest)
+{
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
+ static const char *mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+ static const char *mc_slb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_erat_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_tlb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_user_types[] = {
+ "Indeterminate",
+ "tlbie(l) invalid",
+ "scv invalid",
+ };
+ static const char *mc_ra_types[] = {
+ "Indeterminate",
+ "Instruction fetch (bad)",
+ "Instruction fetch (foreign/control memory)",
+ "Page table walk ifetch (bad)",
+ "Page table walk ifetch (foreign/control memory)",
+ "Load (bad)",
+ "Store (bad)",
+ "Page table walk Load/Store (bad)",
+ "Page table walk Load/Store (foreign/control memory)",
+ "Load/Store (foreign/control memory)",
+ };
+ static const char *mc_link_types[] = {
+ "Indeterminate",
+ "Instruction fetch (timeout)",
+ "Page table walk ifetch (timeout)",
+ "Load (timeout)",
+ "Store (timeout)",
+ "Page table walk Load/Store (timeout)",
+ };
+ static const char *mc_error_class[] = {
+ "Unknown",
+ "Hardware error",
+ "Probable Hardware error (some chance of software cause)",
+ "Software error",
+ "Probable Software error (some chance of hardware cause)",
+ };
+
+ /* Print things out */
+ if (evt->version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt->version);
+ return;
+ }
+ switch (evt->severity) {
+ case MCE_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case MCE_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "Warning";
+ break;
+ case MCE_SEV_SEVERE:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case MCE_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
+ subtype = evt->u.ue_error.ue_error_type <
+ ARRAY_SIZE(mc_ue_types) ?
+ mc_ue_types[evt->u.ue_error.ue_error_type]
+ : "Unknown";
+ if (evt->u.ue_error.effective_address_provided)
+ ea = evt->u.ue_error.effective_address;
+ if (evt->u.ue_error.physical_address_provided)
+ pa = evt->u.ue_error.physical_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
+ subtype = evt->u.slb_error.slb_error_type <
+ ARRAY_SIZE(mc_slb_types) ?
+ mc_slb_types[evt->u.slb_error.slb_error_type]
+ : "Unknown";
+ if (evt->u.slb_error.effective_address_provided)
+ ea = evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
+ subtype = evt->u.erat_error.erat_error_type <
+ ARRAY_SIZE(mc_erat_types) ?
+ mc_erat_types[evt->u.erat_error.erat_error_type]
+ : "Unknown";
+ if (evt->u.erat_error.effective_address_provided)
+ ea = evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
+ subtype = evt->u.tlb_error.tlb_error_type <
+ ARRAY_SIZE(mc_tlb_types) ?
+ mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+ : "Unknown";
+ if (evt->u.tlb_error.effective_address_provided)
+ ea = evt->u.tlb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ err_type = "User";
+ subtype = evt->u.user_error.user_error_type <
+ ARRAY_SIZE(mc_user_types) ?
+ mc_user_types[evt->u.user_error.user_error_type]
+ : "Unknown";
+ if (evt->u.user_error.effective_address_provided)
+ ea = evt->u.user_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
+ subtype = evt->u.ra_error.ra_error_type <
+ ARRAY_SIZE(mc_ra_types) ?
+ mc_ra_types[evt->u.ra_error.ra_error_type]
+ : "Unknown";
+ if (evt->u.ra_error.effective_address_provided)
+ ea = evt->u.ra_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
+ subtype = evt->u.link_error.link_error_type <
+ ARRAY_SIZE(mc_link_types) ?
+ mc_link_types[evt->u.link_error.link_error_type]
+ : "Unknown";
+ if (evt->u.link_error.effective_address_provided)
+ ea = evt->u.link_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ err_type = "Unknown";
+ subtype = "";
+ break;
+ }
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
+
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
+ subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+ mc_error_class[evt->error_class] : "Unknown";
+ printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
+}
+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
+{
+ long handled = 0;
+
+ hv_nmi_check_nonrecoverable(regs);
+
+ /*
+ * See if platform is capable of handling machine check.
+ */
+ if (ppc_md.machine_check_early)
+ handled = ppc_md.machine_check_early(regs);
+
+ return handled;
+}
+
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
+{
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ }
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
+ break;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
+{
+ int ret;
+
+ local_paca->hmi_irqs++;
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
+
+ wait_for_subcore_guest_exit();
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(regs);
+
+ wait_for_tb_resync();
+
+ return 1;
+}
+
+void __init mce_init(void)
+{
+ struct mce_info *mce_info;
+ u64 limit;
+ int i;
+
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ for_each_possible_cpu(i) {
+ mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
+ __alignof__(*mce_info),
+ MEMBLOCK_LOW_LIMIT,
+ limit, early_cpu_to_node(i));
+ if (!mce_info)
+ goto err;
+ paca_ptrs[i]->mce_info = mce_info;
+ }
+ return;
+err:
+ panic("Failed to allocate memory for MCE event data\n");
+}