aboutsummaryrefslogtreecommitdiff
path: root/drivers/vdpa/vdpa_sim/vdpa_sim.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /drivers/vdpa/vdpa_sim/vdpa_sim.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'drivers/vdpa/vdpa_sim/vdpa_sim.c')
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c767
1 files changed, 767 insertions, 0 deletions
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
new file mode 100644
index 000000000..cb88891b4
--- /dev/null
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA device simulator core.
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/dma-map-ops.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/vhost_iotlb.h>
+#include <linux/iova.h>
+#include <uapi/linux/vdpa.h>
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION "0.1"
+#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
+#define DRV_DESC "vDPA Device Simulator core"
+#define DRV_LICENSE "GPL v2"
+
+static int batch_mapping = 1;
+module_param(batch_mapping, int, 0444);
+MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
+
+static int max_iotlb_entries = 2048;
+module_param(max_iotlb_entries, int, 0444);
+MODULE_PARM_DESC(max_iotlb_entries,
+ "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
+
+#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
+#define VDPASIM_QUEUE_MAX 256
+#define VDPASIM_VENDOR_ID 0
+
+static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct vdpasim, vdpa);
+}
+
+static struct vdpasim *dev_to_sim(struct device *dev)
+{
+ struct vdpa_device *vdpa = dev_to_vdpa(dev);
+
+ return vdpa_to_sim(vdpa);
+}
+
+static void vdpasim_vq_notify(struct vringh *vring)
+{
+ struct vdpasim_virtqueue *vq =
+ container_of(vring, struct vdpasim_virtqueue, vring);
+
+ if (!vq->cb)
+ return;
+
+ vq->cb(vq->private);
+}
+
+static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
+{
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+ (struct vring_desc *)(uintptr_t)vq->desc_addr,
+ (struct vring_avail *)
+ (uintptr_t)vq->driver_addr,
+ (struct vring_used *)
+ (uintptr_t)vq->device_addr);
+
+ vq->vring.notify = vdpasim_vq_notify;
+}
+
+static void vdpasim_vq_reset(struct vdpasim *vdpasim,
+ struct vdpasim_virtqueue *vq)
+{
+ vq->ready = false;
+ vq->desc_addr = 0;
+ vq->driver_addr = 0;
+ vq->device_addr = 0;
+ vq->cb = NULL;
+ vq->private = NULL;
+ vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
+ VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+
+ vq->vring.notify = NULL;
+}
+
+static void vdpasim_do_reset(struct vdpasim *vdpasim)
+{
+ int i;
+
+ spin_lock(&vdpasim->iommu_lock);
+
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
+ vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]);
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
+ &vdpasim->iommu_lock);
+ }
+
+ for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ vhost_iotlb_reset(&vdpasim->iommu[i]);
+
+ vdpasim->running = true;
+ spin_unlock(&vdpasim->iommu_lock);
+
+ vdpasim->features = 0;
+ vdpasim->status = 0;
+ ++vdpasim->generation;
+}
+
+static int dir_to_perm(enum dma_data_direction dir)
+{
+ int perm = -EFAULT;
+
+ switch (dir) {
+ case DMA_FROM_DEVICE:
+ perm = VHOST_MAP_WO;
+ break;
+ case DMA_TO_DEVICE:
+ perm = VHOST_MAP_RO;
+ break;
+ case DMA_BIDIRECTIONAL:
+ perm = VHOST_MAP_RW;
+ break;
+ default:
+ break;
+ }
+
+ return perm;
+}
+
+static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
+ size_t size, unsigned int perm)
+{
+ struct iova *iova;
+ dma_addr_t dma_addr;
+ int ret;
+
+ /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
+ iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova),
+ ULONG_MAX - 1, true);
+ if (!iova)
+ return DMA_MAPPING_ERROR;
+
+ dma_addr = iova_dma_addr(&vdpasim->iova, iova);
+
+ spin_lock(&vdpasim->iommu_lock);
+ ret = vhost_iotlb_add_range(&vdpasim->iommu[0], (u64)dma_addr,
+ (u64)dma_addr + size - 1, (u64)paddr, perm);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ if (ret) {
+ __free_iova(&vdpasim->iova, iova);
+ return DMA_MAPPING_ERROR;
+ }
+
+ return dma_addr;
+}
+
+static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
+ size_t size)
+{
+ spin_lock(&vdpasim->iommu_lock);
+ vhost_iotlb_del_range(&vdpasim->iommu[0], (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr));
+}
+
+static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ phys_addr_t paddr = page_to_phys(page) + offset;
+ int perm = dir_to_perm(dir);
+
+ if (perm < 0)
+ return DMA_MAPPING_ERROR;
+
+ return vdpasim_map_range(vdpasim, paddr, size, perm);
+}
+
+static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
+}
+
+static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+ phys_addr_t paddr;
+ void *addr;
+
+ addr = kmalloc(size, flag);
+ if (!addr) {
+ *dma_addr = DMA_MAPPING_ERROR;
+ return NULL;
+ }
+
+ paddr = virt_to_phys(addr);
+
+ *dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW);
+ if (*dma_addr == DMA_MAPPING_ERROR) {
+ kfree(addr);
+ return NULL;
+ }
+
+ return addr;
+}
+
+static void vdpasim_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct vdpasim *vdpasim = dev_to_sim(dev);
+
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
+
+ kfree(vaddr);
+}
+
+static const struct dma_map_ops vdpasim_dma_ops = {
+ .map_page = vdpasim_map_page,
+ .unmap_page = vdpasim_unmap_page,
+ .alloc = vdpasim_alloc_coherent,
+ .free = vdpasim_free_coherent,
+};
+
+static const struct vdpa_config_ops vdpasim_config_ops;
+static const struct vdpa_config_ops vdpasim_batch_config_ops;
+
+struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
+ const struct vdpa_dev_set_config *config)
+{
+ const struct vdpa_config_ops *ops;
+ struct vdpasim *vdpasim;
+ struct device *dev;
+ int i, ret = -ENOMEM;
+
+ if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
+ if (config->device_features &
+ ~dev_attr->supported_features)
+ return ERR_PTR(-EINVAL);
+ dev_attr->supported_features =
+ config->device_features;
+ }
+
+ if (batch_mapping)
+ ops = &vdpasim_batch_config_ops;
+ else
+ ops = &vdpasim_config_ops;
+
+ vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
+ dev_attr->ngroups, dev_attr->nas,
+ dev_attr->name, false);
+ if (IS_ERR(vdpasim)) {
+ ret = PTR_ERR(vdpasim);
+ goto err_alloc;
+ }
+
+ vdpasim->dev_attr = *dev_attr;
+ INIT_WORK(&vdpasim->work, dev_attr->work_fn);
+ spin_lock_init(&vdpasim->lock);
+ spin_lock_init(&vdpasim->iommu_lock);
+
+ dev = &vdpasim->vdpa.dev;
+ dev->dma_mask = &dev->coherent_dma_mask;
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+ goto err_iommu;
+ set_dma_ops(dev, &vdpasim_dma_ops);
+ vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
+
+ vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
+ if (!vdpasim->config)
+ goto err_iommu;
+
+ vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue),
+ GFP_KERNEL);
+ if (!vdpasim->vqs)
+ goto err_iommu;
+
+ vdpasim->iommu = kmalloc_array(vdpasim->dev_attr.nas,
+ sizeof(*vdpasim->iommu), GFP_KERNEL);
+ if (!vdpasim->iommu)
+ goto err_iommu;
+
+ for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
+
+ vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
+ if (!vdpasim->buffer)
+ goto err_iommu;
+
+ for (i = 0; i < dev_attr->nvqs; i++)
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
+ &vdpasim->iommu_lock);
+
+ ret = iova_cache_get();
+ if (ret)
+ goto err_iommu;
+
+ /* For simplicity we use an IOVA allocator with byte granularity */
+ init_iova_domain(&vdpasim->iova, 1, 0);
+
+ vdpasim->vdpa.dma_dev = dev;
+
+ return vdpasim;
+
+err_iommu:
+ put_device(dev);
+err_alloc:
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(vdpasim_create);
+
+static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->desc_addr = desc_area;
+ vq->driver_addr = driver_area;
+ vq->device_addr = device_area;
+
+ return 0;
+}
+
+static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->num = num;
+}
+
+static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ if (vq->ready)
+ schedule_work(&vdpasim->work);
+}
+
+static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
+ struct vdpa_callback *cb)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ vq->cb = cb->callback;
+ vq->private = cb->private;
+}
+
+static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ bool old_ready;
+
+ spin_lock(&vdpasim->lock);
+ old_ready = vq->ready;
+ vq->ready = ready;
+ if (vq->ready && !old_ready) {
+ vdpasim_queue_ready(vdpasim, idx);
+ }
+ spin_unlock(&vdpasim->lock);
+}
+
+static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+
+ return vq->ready;
+}
+
+static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
+ const struct vdpa_vq_state *state)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ struct vringh *vrh = &vq->vring;
+
+ spin_lock(&vdpasim->lock);
+ vrh->last_avail_idx = state->split.avail_index;
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
+static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
+ struct vdpa_vq_state *state)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ struct vringh *vrh = &vq->vring;
+
+ state->split.avail_index = vrh->last_avail_idx;
+ return 0;
+}
+
+static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
+{
+ return VDPASIM_QUEUE_ALIGN;
+}
+
+static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx)
+{
+ /* RX and TX belongs to group 0, CVQ belongs to group 1 */
+ if (idx == 2)
+ return 1;
+ else
+ return 0;
+}
+
+static u64 vdpasim_get_device_features(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.supported_features;
+}
+
+static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ /* DMA mapping must be done by driver */
+ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
+ return -EINVAL;
+
+ vdpasim->features = features & vdpasim->dev_attr.supported_features;
+
+ return 0;
+}
+
+static u64 vdpasim_get_driver_features(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->features;
+}
+
+static void vdpasim_set_config_cb(struct vdpa_device *vdpa,
+ struct vdpa_callback *cb)
+{
+ /* We don't support config interrupt */
+}
+
+static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa)
+{
+ return VDPASIM_QUEUE_MAX;
+}
+
+static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.id;
+}
+
+static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
+{
+ return VDPASIM_VENDOR_ID;
+}
+
+static u8 vdpasim_get_status(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ u8 status;
+
+ spin_lock(&vdpasim->lock);
+ status = vdpasim->status;
+ spin_unlock(&vdpasim->lock);
+
+ return status;
+}
+
+static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->status = status;
+ spin_unlock(&vdpasim->lock);
+}
+
+static int vdpasim_reset(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->status = 0;
+ vdpasim_do_reset(vdpasim);
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
+static int vdpasim_suspend(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->running = false;
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
+static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.config_size;
+}
+
+static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (offset + len > vdpasim->dev_attr.config_size)
+ return;
+
+ if (vdpasim->dev_attr.get_config)
+ vdpasim->dev_attr.get_config(vdpasim, vdpasim->config);
+
+ memcpy(buf, vdpasim->config + offset, len);
+}
+
+static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
+ const void *buf, unsigned int len)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (offset + len > vdpasim->dev_attr.config_size)
+ return;
+
+ memcpy(vdpasim->config + offset, buf, len);
+
+ if (vdpasim->dev_attr.set_config)
+ vdpasim->dev_attr.set_config(vdpasim, vdpasim->config);
+}
+
+static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->generation;
+}
+
+static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
+{
+ struct vdpa_iova_range range = {
+ .first = 0ULL,
+ .last = ULLONG_MAX,
+ };
+
+ return range;
+}
+
+static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
+ unsigned int asid)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vhost_iotlb *iommu;
+ int i;
+
+ if (group > vdpasim->dev_attr.ngroups)
+ return -EINVAL;
+
+ if (asid >= vdpasim->dev_attr.nas)
+ return -EINVAL;
+
+ iommu = &vdpasim->iommu[asid];
+
+ spin_lock(&vdpasim->lock);
+
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
+ if (vdpasim_get_vq_group(vdpa, i) == group)
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu,
+ &vdpasim->iommu_lock);
+
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
+static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
+ struct vhost_iotlb *iotlb)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vhost_iotlb_map *map;
+ struct vhost_iotlb *iommu;
+ u64 start = 0ULL, last = 0ULL - 1;
+ int ret;
+
+ if (asid >= vdpasim->dev_attr.nas)
+ return -EINVAL;
+
+ spin_lock(&vdpasim->iommu_lock);
+
+ iommu = &vdpasim->iommu[asid];
+ vhost_iotlb_reset(iommu);
+
+ for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ ret = vhost_iotlb_add_range(iommu, map->start,
+ map->last, map->addr, map->perm);
+ if (ret)
+ goto err;
+ }
+ spin_unlock(&vdpasim->iommu_lock);
+ return 0;
+
+err:
+ vhost_iotlb_reset(iommu);
+ spin_unlock(&vdpasim->iommu_lock);
+ return ret;
+}
+
+static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
+ u64 iova, u64 size,
+ u64 pa, u32 perm, void *opaque)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ int ret;
+
+ if (asid >= vdpasim->dev_attr.nas)
+ return -EINVAL;
+
+ spin_lock(&vdpasim->iommu_lock);
+ ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova,
+ iova + size - 1, pa, perm, opaque);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ return ret;
+}
+
+static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
+ u64 iova, u64 size)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (asid >= vdpasim->dev_attr.nas)
+ return -EINVAL;
+
+ spin_lock(&vdpasim->iommu_lock);
+ vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ return 0;
+}
+
+static void vdpasim_free(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ int i;
+
+ cancel_work_sync(&vdpasim->work);
+
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
+ vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
+ vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
+ }
+
+ if (vdpa_get_dma_dev(vdpa)) {
+ put_iova_domain(&vdpasim->iova);
+ iova_cache_put();
+ }
+
+ kvfree(vdpasim->buffer);
+ for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ vhost_iotlb_reset(&vdpasim->iommu[i]);
+ kfree(vdpasim->iommu);
+ kfree(vdpasim->vqs);
+ kfree(vdpasim->config);
+}
+
+static const struct vdpa_config_ops vdpasim_config_ops = {
+ .set_vq_address = vdpasim_set_vq_address,
+ .set_vq_num = vdpasim_set_vq_num,
+ .kick_vq = vdpasim_kick_vq,
+ .set_vq_cb = vdpasim_set_vq_cb,
+ .set_vq_ready = vdpasim_set_vq_ready,
+ .get_vq_ready = vdpasim_get_vq_ready,
+ .set_vq_state = vdpasim_set_vq_state,
+ .get_vq_state = vdpasim_get_vq_state,
+ .get_vq_align = vdpasim_get_vq_align,
+ .get_vq_group = vdpasim_get_vq_group,
+ .get_device_features = vdpasim_get_device_features,
+ .set_driver_features = vdpasim_set_driver_features,
+ .get_driver_features = vdpasim_get_driver_features,
+ .set_config_cb = vdpasim_set_config_cb,
+ .get_vq_num_max = vdpasim_get_vq_num_max,
+ .get_device_id = vdpasim_get_device_id,
+ .get_vendor_id = vdpasim_get_vendor_id,
+ .get_status = vdpasim_get_status,
+ .set_status = vdpasim_set_status,
+ .reset = vdpasim_reset,
+ .suspend = vdpasim_suspend,
+ .get_config_size = vdpasim_get_config_size,
+ .get_config = vdpasim_get_config,
+ .set_config = vdpasim_set_config,
+ .get_generation = vdpasim_get_generation,
+ .get_iova_range = vdpasim_get_iova_range,
+ .set_group_asid = vdpasim_set_group_asid,
+ .dma_map = vdpasim_dma_map,
+ .dma_unmap = vdpasim_dma_unmap,
+ .free = vdpasim_free,
+};
+
+static const struct vdpa_config_ops vdpasim_batch_config_ops = {
+ .set_vq_address = vdpasim_set_vq_address,
+ .set_vq_num = vdpasim_set_vq_num,
+ .kick_vq = vdpasim_kick_vq,
+ .set_vq_cb = vdpasim_set_vq_cb,
+ .set_vq_ready = vdpasim_set_vq_ready,
+ .get_vq_ready = vdpasim_get_vq_ready,
+ .set_vq_state = vdpasim_set_vq_state,
+ .get_vq_state = vdpasim_get_vq_state,
+ .get_vq_align = vdpasim_get_vq_align,
+ .get_vq_group = vdpasim_get_vq_group,
+ .get_device_features = vdpasim_get_device_features,
+ .set_driver_features = vdpasim_set_driver_features,
+ .get_driver_features = vdpasim_get_driver_features,
+ .set_config_cb = vdpasim_set_config_cb,
+ .get_vq_num_max = vdpasim_get_vq_num_max,
+ .get_device_id = vdpasim_get_device_id,
+ .get_vendor_id = vdpasim_get_vendor_id,
+ .get_status = vdpasim_get_status,
+ .set_status = vdpasim_set_status,
+ .reset = vdpasim_reset,
+ .suspend = vdpasim_suspend,
+ .get_config_size = vdpasim_get_config_size,
+ .get_config = vdpasim_get_config,
+ .set_config = vdpasim_set_config,
+ .get_generation = vdpasim_get_generation,
+ .get_iova_range = vdpasim_get_iova_range,
+ .set_group_asid = vdpasim_set_group_asid,
+ .set_map = vdpasim_set_map,
+ .free = vdpasim_free,
+};
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);