aboutsummaryrefslogtreecommitdiff
path: root/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c')
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c1465
1 files changed, 1465 insertions, 0 deletions
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
new file mode 100644
index 000000000..0bba3b05c
--- /dev/null
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -0,0 +1,1465 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, HiSilicon Ltd.
+ */
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/hisi_acc_qm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_core.h>
+#include <linux/anon_inodes.h>
+
+#include "hisi_acc_vfio_pci.h"
+
+/* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_dev_not_ready(struct hisi_qm *qm)
+{
+ u32 val;
+
+ return readl_relaxed_poll_timeout(qm->io_base + QM_VF_STATE,
+ val, !(val & 0x1), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+}
+
+/*
+ * Each state Reg is checked 100 times,
+ * with a delay of 100 microseconds after each check
+ */
+static u32 qm_check_reg_state(struct hisi_qm *qm, u32 regs)
+{
+ int check_times = 0;
+ u32 state;
+
+ state = readl(qm->io_base + regs);
+ while (state && check_times < ERROR_CHECK_TIMEOUT) {
+ udelay(CHECK_DELAY_TIME);
+ state = readl(qm->io_base + regs);
+ check_times++;
+ }
+
+ return state;
+}
+
+static int qm_read_regs(struct hisi_qm *qm, u32 reg_addr,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++) {
+ data[i] = readl(qm->io_base + reg_addr);
+ reg_addr += QM_REG_ADDR_OFFSET;
+ }
+
+ return 0;
+}
+
+static int qm_write_regs(struct hisi_qm *qm, u32 reg,
+ u32 *data, u8 nums)
+{
+ int i;
+
+ if (nums < 1 || nums > QM_REGS_MAX_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < nums; i++)
+ writel(data[i], qm->io_base + reg + i * QM_REG_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_vft(struct hisi_qm *qm, u32 *base)
+{
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static int qm_get_sqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_cqc(struct hisi_qm *qm, u64 *addr)
+{
+ int ret;
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1);
+ if (ret)
+ return ret;
+
+ *addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+ QM_XQC_ADDR_OFFSET);
+
+ return 0;
+}
+
+static int qm_get_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ ret = qm_read_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_read_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_read_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_read_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to read QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ /* Check VF state */
+ if (unlikely(hisi_qm_wait_mb_ready(qm))) {
+ dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
+ return -EBUSY;
+ }
+
+ ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_AEQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_EQ_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_SOURCE_V,
+ &vf_data->ifc_int_source, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SOURCE_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_MASK\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_IFC_INT_SET_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ ret = qm_write_regs(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_PAGE_SIZE\n");
+ return ret;
+ }
+
+ /* QM_EQC_DW has 7 regs */
+ ret = qm_write_regs(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_EQC_DW\n");
+ return ret;
+ }
+
+ /* QM_AEQC_DW has 7 regs */
+ ret = qm_write_regs(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+ if (ret) {
+ dev_err(dev, "failed to write QM_AEQC_DW\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
+ u16 index, u8 priority)
+{
+ u64 doorbell;
+ u64 dbase;
+ u16 randata = 0;
+
+ if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+ dbase = QM_DOORBELL_SQ_CQ_BASE_V2;
+ else
+ dbase = QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+ doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+ ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+ ((u64)index << QM_DB_INDEX_SHIFT_V2) |
+ ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+ writeq(doorbell, qm->io_base + dbase);
+}
+
+static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id, u32 *rbase)
+{
+ unsigned int val;
+ u64 sqc_vft;
+ u32 qp_num;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+ /* 0 mean SQC VFT */
+ writel(0x0, qm->io_base + QM_VFT_CFG_TYPE);
+ writel(vf_id, qm->io_base + QM_VFT_CFG);
+
+ writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US);
+ if (ret)
+ return ret;
+
+ sqc_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+ ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) <<
+ QM_XQC_ADDR_OFFSET);
+ *rbase = QM_SQC_VFT_BASE_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+ qp_num = (QM_SQC_VFT_NUM_MASK_V2 &
+ (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+ return qp_num;
+}
+
+static void qm_dev_cmd_init(struct hisi_qm *qm)
+{
+ /* Clear VF communication status registers. */
+ writel(0x1, qm->io_base + QM_IFC_INT_SOURCE_V);
+
+ /* Enable pf and vf communication. */
+ writel(0x0, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static int vf_qm_cache_wb(struct hisi_qm *qm)
+{
+ unsigned int val;
+
+ writel(0x1, qm->io_base + QM_CACHE_WB_START);
+ if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+ val, val & BIT(0), MB_POLL_PERIOD_US,
+ MB_POLL_TIMEOUT_US)) {
+ dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vf_qm_fun_reset(struct hisi_qm *qm)
+{
+ int i;
+
+ for (i = 0; i < qm->qp_num; i++)
+ qm_db(qm, i, QM_DOORBELL_CMD_SQ, 0, 1);
+}
+
+static int vf_qm_func_stop(struct hisi_qm *qm)
+{
+ return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
+}
+
+static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &vf_qm->pdev->dev;
+ u32 que_iso_state;
+ int ret;
+
+ if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
+ return 0;
+
+ if (vf_data->acc_magic != ACC_DEV_MAGIC) {
+ dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
+ return -EINVAL;
+ }
+
+ if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) {
+ dev_err(dev, "failed to match VF devices\n");
+ return -EINVAL;
+ }
+
+ /* VF qp num check */
+ ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums\n");
+ return -EINVAL;
+ }
+
+ if (ret != vf_data->qp_num) {
+ dev_err(dev, "failed to match VF qp num\n");
+ return -EINVAL;
+ }
+
+ vf_qm->qp_num = ret;
+
+ /* VF isolation state check */
+ ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n");
+ return ret;
+ }
+
+ if (vf_data->que_iso_cfg != que_iso_state) {
+ dev_err(dev, "failed to match isolation state\n");
+ return -EINVAL;
+ }
+
+ ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
+ if (ret) {
+ dev_err(dev, "failed to write QM_VF_STATE\n");
+ return ret;
+ }
+
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+ hisi_acc_vdev->match_done = true;
+ return 0;
+}
+
+static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct acc_vf_data *vf_data)
+{
+ struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
+ struct device *dev = &pf_qm->pdev->dev;
+ int vf_id = hisi_acc_vdev->vf_id;
+ int ret;
+
+ vf_data->acc_magic = ACC_DEV_MAGIC;
+ /* Save device id */
+ vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
+
+ /* VF qp num save from PF */
+ ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base);
+ if (ret <= 0) {
+ dev_err(dev, "failed to get vft qp nums!\n");
+ return -EINVAL;
+ }
+
+ vf_data->qp_num = ret;
+
+ /* VF isolation state save from PF */
+ ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+ if (ret) {
+ dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct hisi_qm *qm = &hisi_acc_vdev->vf_qm;
+ struct device *dev = &qm->pdev->dev;
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ int ret;
+
+ /* Return if only match data was transferred */
+ if (migf->total_length == QM_MATCH_SIZE)
+ return 0;
+
+ if (migf->total_length < sizeof(struct acc_vf_data))
+ return -EINVAL;
+
+ qm->eqe_dma = vf_data->eqe_dma;
+ qm->aeqe_dma = vf_data->aeqe_dma;
+ qm->sqc_dma = vf_data->sqc_dma;
+ qm->cqc_dma = vf_data->cqc_dma;
+
+ qm->qp_base = vf_data->qp_base;
+ qm->qp_num = vf_data->qp_num;
+
+ ret = qm_set_regs(qm, vf_data);
+ if (ret) {
+ dev_err(dev, "set VF regs failed\n");
+ return ret;
+ }
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "set sqc failed\n");
+ return ret;
+ }
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+ if (ret) {
+ dev_err(dev, "set cqc failed\n");
+ return ret;
+ }
+
+ qm_dev_cmd_init(qm);
+ return 0;
+}
+
+static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ struct hisi_acc_vf_migration_file *migf)
+{
+ struct acc_vf_data *vf_data = &migf->vf_data;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct device *dev = &vf_qm->pdev->dev;
+ int ret;
+
+ if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
+ /* Update state and return with match data */
+ vf_data->vf_qm_state = QM_NOT_READY;
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+ migf->total_length = QM_MATCH_SIZE;
+ return 0;
+ }
+
+ vf_data->vf_qm_state = QM_READY;
+ hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+
+ ret = vf_qm_cache_wb(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to writeback QM Cache!\n");
+ return ret;
+ }
+
+ ret = qm_get_regs(vf_qm, vf_data);
+ if (ret)
+ return -EINVAL;
+
+ /* Every reg is 32 bit, the dma address is 64 bit. */
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
+ vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
+ vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
+
+ /* Through SQC_BT/CQC_BT to get sqc and cqc address */
+ ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read SQC addr!\n");
+ return -EINVAL;
+ }
+
+ ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
+ if (ret) {
+ dev_err(dev, "failed to read CQC addr!\n");
+ return -EINVAL;
+ }
+
+ migf->total_length = sizeof(struct acc_vf_data);
+ return 0;
+}
+
+static struct hisi_acc_vf_core_device *hisi_acc_drvdata(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+
+ return container_of(core_device, struct hisi_acc_vf_core_device,
+ core_device);
+}
+
+/* Check the PF's RAS state and Function INT state */
+static int
+hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_qm *vfqm = &hisi_acc_vdev->vf_qm;
+ struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
+ struct pci_dev *vf_pdev = hisi_acc_vdev->vf_dev;
+ struct device *dev = &qm->pdev->dev;
+ u32 state;
+
+ /* Check RAS state */
+ state = qm_check_reg_state(qm, QM_ABNORMAL_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM RAS state!\n");
+ return -EBUSY;
+ }
+
+ /* Check Function Communication state between PF and VF */
+ state = qm_check_reg_state(vfqm, QM_IFC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT state!\n");
+ return -EBUSY;
+ }
+ state = qm_check_reg_state(vfqm, QM_IFC_INT_SET_V);
+ if (state) {
+ dev_err(dev, "failed to check QM IFC INT SET state!\n");
+ return -EBUSY;
+ }
+
+ /* Check submodule task state */
+ switch (vf_pdev->device) {
+ case PCI_DEVICE_ID_HUAWEI_SEC_VF:
+ state = qm_check_reg_state(qm, SEC_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM SEC Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case PCI_DEVICE_ID_HUAWEI_HPRE_VF:
+ state = qm_check_reg_state(qm, HPRE_HAC_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM HPRE HAC INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ case PCI_DEVICE_ID_HUAWEI_ZIP_VF:
+ state = qm_check_reg_state(qm, HZIP_CORE_INT_STATUS);
+ if (state) {
+ dev_err(dev, "failed to check QM ZIP Core INT state!\n");
+ return -EBUSY;
+ }
+ return 0;
+ default:
+ dev_err(dev, "failed to detect acc module type!\n");
+ return -EINVAL;
+ }
+}
+
+static void hisi_acc_vf_disable_fd(struct hisi_acc_vf_migration_file *migf)
+{
+ mutex_lock(&migf->lock);
+ migf->disabled = true;
+ migf->total_length = 0;
+ migf->filp->f_pos = 0;
+ mutex_unlock(&migf->lock);
+}
+
+static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ if (hisi_acc_vdev->resuming_migf) {
+ hisi_acc_vf_disable_fd(hisi_acc_vdev->resuming_migf);
+ fput(hisi_acc_vdev->resuming_migf->filp);
+ hisi_acc_vdev->resuming_migf = NULL;
+ }
+
+ if (hisi_acc_vdev->saving_migf) {
+ hisi_acc_vf_disable_fd(hisi_acc_vdev->saving_migf);
+ fput(hisi_acc_vdev->saving_migf->filp);
+ hisi_acc_vdev->saving_migf = NULL;
+ }
+}
+
+/*
+ * This function is called in all state_mutex unlock cases to
+ * handle a 'deferred_reset' if exists.
+ */
+static void
+hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+again:
+ spin_lock(&hisi_acc_vdev->reset_lock);
+ if (hisi_acc_vdev->deferred_reset) {
+ hisi_acc_vdev->deferred_reset = false;
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
+ hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ goto again;
+ }
+ mutex_unlock(&hisi_acc_vdev->state_mutex);
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+}
+
+static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+
+ if (hisi_acc_vdev->vf_qm_state != QM_READY)
+ return;
+
+ /* Make sure the device is enabled */
+ qm_dev_cmd_init(vf_qm);
+
+ vf_qm_fun_reset(vf_qm);
+}
+
+static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct device *dev = &hisi_acc_vdev->vf_dev->dev;
+ struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf;
+ int ret;
+
+ /* Recover data to VF */
+ ret = vf_qm_load_data(hisi_acc_vdev, migf);
+ if (ret) {
+ dev_err(dev, "failed to recover the VF!\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vf_release_file(struct inode *inode, struct file *filp)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+
+ hisi_acc_vf_disable_fd(migf);
+ mutex_destroy(&migf->lock);
+ kfree(migf);
+ return 0;
+}
+
+static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+ loff_t requested_length;
+ ssize_t done = 0;
+ int ret;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ if (*pos < 0 ||
+ check_add_overflow((loff_t)len, *pos, &requested_length))
+ return -EINVAL;
+
+ if (requested_length > sizeof(struct acc_vf_data))
+ return -ENOMEM;
+
+ mutex_lock(&migf->lock);
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ ret = copy_from_user(&migf->vf_data, buf, len);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += len;
+ done = len;
+ migf->total_length += len;
+
+ ret = vf_qm_check_match(migf->hisi_acc_vdev, migf);
+ if (ret)
+ done = -EFAULT;
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations hisi_acc_vf_resume_fops = {
+ .owner = THIS_MODULE,
+ .write = hisi_acc_vf_resume_write,
+ .release = hisi_acc_vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_resume_fops, migf,
+ O_WRONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ migf->hisi_acc_vdev = hisi_acc_vdev;
+ return migf;
+}
+
+static long hisi_acc_vf_precopy_ioctl(struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev;
+ loff_t *pos = &filp->f_pos;
+ struct vfio_precopy_info info;
+ unsigned long minsz;
+ int ret;
+
+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+ return -ENOTTY;
+
+ minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ mutex_lock(&hisi_acc_vdev->state_mutex);
+ if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) {
+ mutex_unlock(&hisi_acc_vdev->state_mutex);
+ return -EINVAL;
+ }
+
+ mutex_lock(&migf->lock);
+
+ if (migf->disabled) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (*pos > migf->total_length) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ info.dirty_bytes = 0;
+ info.initial_bytes = migf->total_length - *pos;
+
+ ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+out:
+ mutex_unlock(&migf->lock);
+ mutex_unlock(&hisi_acc_vdev->state_mutex);
+ return ret;
+}
+
+static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ struct hisi_acc_vf_migration_file *migf = filp->private_data;
+ ssize_t done = 0;
+ int ret;
+
+ if (pos)
+ return -ESPIPE;
+ pos = &filp->f_pos;
+
+ mutex_lock(&migf->lock);
+ if (*pos > migf->total_length) {
+ done = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (migf->disabled) {
+ done = -ENODEV;
+ goto out_unlock;
+ }
+
+ len = min_t(size_t, migf->total_length - *pos, len);
+ if (len) {
+ ret = copy_to_user(buf, &migf->vf_data, len);
+ if (ret) {
+ done = -EFAULT;
+ goto out_unlock;
+ }
+ *pos += len;
+ done = len;
+ }
+out_unlock:
+ mutex_unlock(&migf->lock);
+ return done;
+}
+
+static const struct file_operations hisi_acc_vf_save_fops = {
+ .owner = THIS_MODULE,
+ .read = hisi_acc_vf_save_read,
+ .unlocked_ioctl = hisi_acc_vf_precopy_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .release = hisi_acc_vf_release_file,
+ .llseek = no_llseek,
+};
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_acc_vf_migration_file *migf;
+ int ret;
+
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ if (!migf)
+ return ERR_PTR(-ENOMEM);
+
+ migf->filp = anon_inode_getfile("hisi_acc_vf_mig", &hisi_acc_vf_save_fops, migf,
+ O_RDONLY);
+ if (IS_ERR(migf->filp)) {
+ int err = PTR_ERR(migf->filp);
+
+ kfree(migf);
+ return ERR_PTR(err);
+ }
+
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ migf->hisi_acc_vdev = hisi_acc_vdev;
+
+ ret = vf_qm_get_match_data(hisi_acc_vdev, &migf->vf_data);
+ if (ret) {
+ fput(migf->filp);
+ return ERR_PTR(ret);
+ }
+
+ return migf;
+}
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_pre_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return migf;
+
+ migf->total_length = QM_MATCH_SIZE;
+ return migf;
+}
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, bool open)
+{
+ int ret;
+ struct hisi_acc_vf_migration_file *migf = NULL;
+
+ if (open) {
+ /*
+ * Userspace didn't use PRECOPY support. Hence saving_migf
+ * is not opened yet.
+ */
+ migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return migf;
+ } else {
+ migf = hisi_acc_vdev->saving_migf;
+ }
+
+ ret = vf_qm_state_save(hisi_acc_vdev, migf);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return open ? migf : NULL;
+}
+
+static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct device *dev = &hisi_acc_vdev->vf_dev->dev;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ int ret;
+
+ ret = vf_qm_func_stop(vf_qm);
+ if (ret) {
+ dev_err(dev, "failed to stop QM VF function!\n");
+ return ret;
+ }
+
+ ret = hisi_acc_check_int_state(hisi_acc_vdev);
+ if (ret) {
+ dev_err(dev, "failed to check QM INT state!\n");
+ return ret;
+ }
+ return 0;
+}
+
+static struct file *
+hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev,
+ u32 new)
+{
+ u32 cur = hisi_acc_vdev->mig_state;
+ int ret;
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_vf_pre_copy(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ hisi_acc_vdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
+ if (ret)
+ return ERR_PTR(ret);
+
+ migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, false);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) {
+ ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
+ if (ret)
+ return ERR_PTR(ret);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, true);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ hisi_acc_vdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
+ struct hisi_acc_vf_migration_file *migf;
+
+ migf = hisi_acc_vf_pci_resume(hisi_acc_vdev);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ hisi_acc_vdev->resuming_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
+ ret = hisi_acc_vf_load_state(hisi_acc_vdev);
+ if (ret)
+ return ERR_PTR(ret);
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) {
+ hisi_acc_vf_disable_fds(hisi_acc_vdev);
+ return NULL;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) {
+ hisi_acc_vf_start_device(hisi_acc_vdev);
+ return NULL;
+ }
+
+ /*
+ * vfio_mig_get_next_state() does not use arcs other than the above
+ */
+ WARN_ON(true);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct file *
+hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state new_state)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ enum vfio_device_mig_state next_state;
+ struct file *res = NULL;
+ int ret;
+
+ mutex_lock(&hisi_acc_vdev->state_mutex);
+ while (new_state != hisi_acc_vdev->mig_state) {
+ ret = vfio_mig_get_next_state(vdev,
+ hisi_acc_vdev->mig_state,
+ new_state, &next_state);
+ if (ret) {
+ res = ERR_PTR(-EINVAL);
+ break;
+ }
+
+ res = hisi_acc_vf_set_device_state(hisi_acc_vdev, next_state);
+ if (IS_ERR(res))
+ break;
+ hisi_acc_vdev->mig_state = next_state;
+ if (WARN_ON(res && new_state != hisi_acc_vdev->mig_state)) {
+ fput(res);
+ res = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+ return res;
+}
+
+static int
+hisi_acc_vfio_pci_get_data_size(struct vfio_device *vdev,
+ unsigned long *stop_copy_length)
+{
+ *stop_copy_length = sizeof(struct acc_vf_data);
+ return 0;
+}
+
+static int
+hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
+ enum vfio_device_mig_state *curr_state)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+
+ mutex_lock(&hisi_acc_vdev->state_mutex);
+ *curr_state = hisi_acc_vdev->mig_state;
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+ return 0;
+}
+
+static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+
+ if (hisi_acc_vdev->core_device.vdev.migration_flags !=
+ VFIO_MIGRATION_STOP_COPY)
+ return;
+
+ /*
+ * As the higher VFIO layers are holding locks across reset and using
+ * those same locks with the mm_lock we need to prevent ABBA deadlock
+ * with the state_mutex and mm_lock.
+ * In case the state_mutex was taken already we defer the cleanup work
+ * to the unlock flow of the other running context.
+ */
+ spin_lock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vdev->deferred_reset = true;
+ if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) {
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ return;
+ }
+ spin_unlock(&hisi_acc_vdev->reset_lock);
+ hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+}
+
+static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+ struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device;
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+ struct pci_dev *vf_dev = vdev->pdev;
+
+ /*
+ * ACC VF dev BAR2 region consists of both functional register space
+ * and migration control register space. For migration to work, we
+ * need access to both. Hence, we map the entire BAR2 region here.
+ * But unnecessarily exposing the migration BAR region to the Guest
+ * has the potential to prevent/corrupt the Guest migration. Hence,
+ * we restrict access to the migration control space from
+ * Guest(Please see mmap/ioctl/read/write override functions).
+ *
+ * Please note that it is OK to expose the entire VF BAR if migration
+ * is not supported or required as this cannot affect the ACC PF
+ * configurations.
+ *
+ * Also the HiSilicon ACC VF devices supported by this driver on
+ * HiSilicon hardware platforms are integrated end point devices
+ * and the platform lacks the capability to perform any PCIe P2P
+ * between these devices.
+ */
+
+ vf_qm->io_base =
+ ioremap(pci_resource_start(vf_dev, VFIO_PCI_BAR2_REGION_INDEX),
+ pci_resource_len(vf_dev, VFIO_PCI_BAR2_REGION_INDEX));
+ if (!vf_qm->io_base)
+ return -EIO;
+
+ vf_qm->fun_type = QM_HW_VF;
+ vf_qm->pdev = vf_dev;
+ mutex_init(&vf_qm->mailbox_lock);
+
+ return 0;
+}
+
+static struct hisi_qm *hisi_acc_get_pf_qm(struct pci_dev *pdev)
+{
+ struct hisi_qm *pf_qm;
+ struct pci_driver *pf_driver;
+
+ if (!pdev->is_virtfn)
+ return NULL;
+
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_HUAWEI_SEC_VF:
+ pf_driver = hisi_sec_get_pf_driver();
+ break;
+ case PCI_DEVICE_ID_HUAWEI_HPRE_VF:
+ pf_driver = hisi_hpre_get_pf_driver();
+ break;
+ case PCI_DEVICE_ID_HUAWEI_ZIP_VF:
+ pf_driver = hisi_zip_get_pf_driver();
+ break;
+ default:
+ return NULL;
+ }
+
+ if (!pf_driver)
+ return NULL;
+
+ pf_qm = pci_iov_get_pf_drvdata(pdev, pf_driver);
+
+ return !IS_ERR(pf_qm) ? pf_qm : NULL;
+}
+
+static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev,
+ size_t count, loff_t *ppos,
+ size_t *new_count)
+{
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+ if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+ /* Check if access is for migration control region */
+ if (pos >= end)
+ return -EINVAL;
+
+ *new_count = min(count, (size_t)(end - pos));
+ }
+
+ return 0;
+}
+
+static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ unsigned int index;
+
+ index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+ u64 req_len, pgoff, req_start;
+ resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+ req_len = vma->vm_end - vma->vm_start;
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ req_start = pgoff << PAGE_SHIFT;
+
+ if (req_start + req_len > end)
+ return -EINVAL;
+ }
+
+ return vfio_pci_core_mmap(core_vdev, vma);
+}
+
+static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ size_t new_count = count;
+ int ret;
+
+ ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+ if (ret)
+ return ret;
+
+ return vfio_pci_core_write(core_vdev, buf, new_count, ppos);
+}
+
+static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ size_t new_count = count;
+ int ret;
+
+ ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+ if (ret)
+ return ret;
+
+ return vfio_pci_core_read(core_vdev, buf, new_count, ppos);
+}
+
+static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ struct pci_dev *pdev = vdev->pdev;
+ struct vfio_region_info info;
+ unsigned long minsz;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+
+ /*
+ * ACC VF dev BAR2 region consists of both functional
+ * register space and migration control register space.
+ * Report only the functional region to Guest.
+ */
+ info.size = pci_resource_len(pdev, info.index) / 2;
+
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP;
+
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
+ }
+ }
+ return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+}
+
+static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct vfio_pci_core_device *vdev = &hisi_acc_vdev->core_device;
+ int ret;
+
+ ret = vfio_pci_core_enable(vdev);
+ if (ret)
+ return ret;
+
+ if (core_vdev->mig_ops) {
+ ret = hisi_acc_vf_qm_init(hisi_acc_vdev);
+ if (ret) {
+ vfio_pci_core_disable(vdev);
+ return ret;
+ }
+ hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+ }
+
+ vfio_pci_core_finish_enable(vdev);
+ return 0;
+}
+
+static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
+
+ iounmap(vf_qm->io_base);
+ vfio_pci_core_close_device(core_vdev);
+}
+
+static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {
+ .migration_set_state = hisi_acc_vfio_pci_set_device_state,
+ .migration_get_state = hisi_acc_vfio_pci_get_device_state,
+ .migration_get_data_size = hisi_acc_vfio_pci_get_data_size,
+};
+
+static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
+ struct hisi_qm *pf_qm = hisi_acc_get_pf_qm(pdev);
+
+ hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
+ hisi_acc_vdev->pf_qm = pf_qm;
+ hisi_acc_vdev->vf_dev = pdev;
+ mutex_init(&hisi_acc_vdev->state_mutex);
+
+ core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY;
+ core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops;
+
+ return vfio_pci_core_init_dev(core_vdev);
+}
+
+static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
+ .name = "hisi-acc-vfio-pci-migration",
+ .init = hisi_acc_vfio_pci_migrn_init_dev,
+ .release = vfio_pci_core_release_dev,
+ .open_device = hisi_acc_vfio_pci_open_device,
+ .close_device = hisi_acc_vfio_pci_close_device,
+ .ioctl = hisi_acc_vfio_pci_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = hisi_acc_vfio_pci_read,
+ .write = hisi_acc_vfio_pci_write,
+ .mmap = hisi_acc_vfio_pci_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
+};
+
+static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
+ .name = "hisi-acc-vfio-pci",
+ .init = vfio_pci_core_init_dev,
+ .release = vfio_pci_core_release_dev,
+ .open_device = hisi_acc_vfio_pci_open_device,
+ .close_device = vfio_pci_core_close_device,
+ .ioctl = vfio_pci_core_ioctl,
+ .device_feature = vfio_pci_core_ioctl_feature,
+ .read = vfio_pci_core_read,
+ .write = vfio_pci_core_write,
+ .mmap = vfio_pci_core_mmap,
+ .request = vfio_pci_core_request,
+ .match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
+};
+
+static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev;
+ const struct vfio_device_ops *ops = &hisi_acc_vfio_pci_ops;
+ struct hisi_qm *pf_qm;
+ int vf_id;
+ int ret;
+
+ pf_qm = hisi_acc_get_pf_qm(pdev);
+ if (pf_qm && pf_qm->ver >= QM_HW_V3) {
+ vf_id = pci_iov_vf_id(pdev);
+ if (vf_id >= 0)
+ ops = &hisi_acc_vfio_pci_migrn_ops;
+ else
+ pci_warn(pdev, "migration support failed, continue with generic interface\n");
+ }
+
+ hisi_acc_vdev = vfio_alloc_device(hisi_acc_vf_core_device,
+ core_device.vdev, &pdev->dev, ops);
+ if (IS_ERR(hisi_acc_vdev))
+ return PTR_ERR(hisi_acc_vdev);
+
+ dev_set_drvdata(&pdev->dev, &hisi_acc_vdev->core_device);
+ ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
+ if (ret)
+ goto out_put_vdev;
+ return 0;
+
+out_put_vdev:
+ vfio_put_device(&hisi_acc_vdev->core_device.vdev);
+ return ret;
+}
+
+static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
+
+ vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
+ vfio_put_device(&hisi_acc_vdev->core_device.vdev);
+}
+
+static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_SEC_VF) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_HPRE_VF) },
+ { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HUAWEI_ZIP_VF) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table);
+
+static const struct pci_error_handlers hisi_acc_vf_err_handlers = {
+ .reset_done = hisi_acc_vf_pci_aer_reset_done,
+ .error_detected = vfio_pci_core_aer_err_detected,
+};
+
+static struct pci_driver hisi_acc_vfio_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = hisi_acc_vfio_pci_table,
+ .probe = hisi_acc_vfio_pci_probe,
+ .remove = hisi_acc_vfio_pci_remove,
+ .err_handler = &hisi_acc_vf_err_handlers,
+ .driver_managed_dma = true,
+};
+
+module_pci_driver(hisi_acc_vfio_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Liu Longfang <liulongfang@huawei.com>");
+MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon VFIO PCI - VFIO PCI driver with live migration support for HiSilicon ACC device family");