aboutsummaryrefslogtreecommitdiff
path: root/drivers/virtio/virtio_pci_modern.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /drivers/virtio/virtio_pci_modern.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'drivers/virtio/virtio_pci_modern.c')
-rw-r--r--drivers/virtio/virtio_pci_modern.c556
1 files changed, 556 insertions, 0 deletions
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
new file mode 100644
index 000000000..9e496e288
--- /dev/null
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio PCI driver - modern (virtio 1.0) device support
+ *
+ * This module allows virtio devices to be used over a virtual PCI device.
+ * This can be used with QEMU based VMMs like KVM or Xen.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Rusty Russell <rusty@rustcorp.com.au>
+ * Michael S. Tsirkin <mst@redhat.com>
+ */
+
+#include <linux/delay.h>
+#define VIRTIO_PCI_NO_LEGACY
+#define VIRTIO_RING_NO_LEGACY
+#include "virtio_pci_common.h"
+
+static u64 vp_get_features(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ return vp_modern_get_features(&vp_dev->mdev);
+}
+
+static void vp_transport_features(struct virtio_device *vdev, u64 features)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct pci_dev *pci_dev = vp_dev->pci_dev;
+
+ if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
+ pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
+ __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
+
+ if (features & BIT_ULL(VIRTIO_F_RING_RESET))
+ __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
+}
+
+/* virtio config->finalize_features() implementation */
+static int vp_finalize_features(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ u64 features = vdev->features;
+
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
+ /* Give virtio_pci a chance to accept features. */
+ vp_transport_features(vdev, features);
+
+ if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
+ dev_err(&vdev->dev, "virtio: device uses modern interface "
+ "but does not have VIRTIO_F_VERSION_1\n");
+ return -EINVAL;
+ }
+
+ vp_modern_set_features(&vp_dev->mdev, vdev->features);
+
+ return 0;
+}
+
+/* virtio config->get() implementation */
+static void vp_get(struct virtio_device *vdev, unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ void __iomem *device = mdev->device;
+ u8 b;
+ __le16 w;
+ __le32 l;
+
+ BUG_ON(offset + len > mdev->device_len);
+
+ switch (len) {
+ case 1:
+ b = ioread8(device + offset);
+ memcpy(buf, &b, sizeof b);
+ break;
+ case 2:
+ w = cpu_to_le16(ioread16(device + offset));
+ memcpy(buf, &w, sizeof w);
+ break;
+ case 4:
+ l = cpu_to_le32(ioread32(device + offset));
+ memcpy(buf, &l, sizeof l);
+ break;
+ case 8:
+ l = cpu_to_le32(ioread32(device + offset));
+ memcpy(buf, &l, sizeof l);
+ l = cpu_to_le32(ioread32(device + offset + sizeof l));
+ memcpy(buf + sizeof l, &l, sizeof l);
+ break;
+ default:
+ BUG();
+ }
+}
+
+/* the config->set() implementation. it's symmetric to the config->get()
+ * implementation */
+static void vp_set(struct virtio_device *vdev, unsigned int offset,
+ const void *buf, unsigned int len)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ void __iomem *device = mdev->device;
+ u8 b;
+ __le16 w;
+ __le32 l;
+
+ BUG_ON(offset + len > mdev->device_len);
+
+ switch (len) {
+ case 1:
+ memcpy(&b, buf, sizeof b);
+ iowrite8(b, device + offset);
+ break;
+ case 2:
+ memcpy(&w, buf, sizeof w);
+ iowrite16(le16_to_cpu(w), device + offset);
+ break;
+ case 4:
+ memcpy(&l, buf, sizeof l);
+ iowrite32(le32_to_cpu(l), device + offset);
+ break;
+ case 8:
+ memcpy(&l, buf, sizeof l);
+ iowrite32(le32_to_cpu(l), device + offset);
+ memcpy(&l, buf + sizeof l, sizeof l);
+ iowrite32(le32_to_cpu(l), device + offset + sizeof l);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static u32 vp_generation(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ return vp_modern_generation(&vp_dev->mdev);
+}
+
+/* config->{get,set}_status() implementations */
+static u8 vp_get_status(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ return vp_modern_get_status(&vp_dev->mdev);
+}
+
+static void vp_set_status(struct virtio_device *vdev, u8 status)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ /* We should never be setting status to 0. */
+ BUG_ON(status == 0);
+ vp_modern_set_status(&vp_dev->mdev, status);
+}
+
+static void vp_reset(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+
+ /* 0 status means a reset. */
+ vp_modern_set_status(mdev, 0);
+ /* After writing 0 to device_status, the driver MUST wait for a read of
+ * device_status to return 0 before reinitializing the device.
+ * This will flush out the status write, and flush in device writes,
+ * including MSI-X interrupts, if any.
+ */
+ while (vp_modern_get_status(mdev))
+ msleep(1);
+ /* Flush pending VQ/configuration callbacks. */
+ vp_synchronize_vectors(vdev);
+}
+
+static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ unsigned long index;
+
+ index = vq->index;
+
+ /* activate the queue */
+ vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
+ vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
+ virtqueue_get_avail_addr(vq),
+ virtqueue_get_used_addr(vq));
+
+ if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+ msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
+ if (msix_vec == VIRTIO_MSI_NO_VECTOR)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ struct virtio_pci_vq_info *info;
+ unsigned long flags;
+
+ if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
+ return -ENOENT;
+
+ vp_modern_set_queue_reset(mdev, vq->index);
+
+ info = vp_dev->vqs[vq->index];
+
+ /* delete vq from irq handler */
+ spin_lock_irqsave(&vp_dev->lock, flags);
+ list_del(&info->node);
+ spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+ INIT_LIST_HEAD(&info->node);
+
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
+ __virtqueue_break(vq);
+#endif
+
+ /* For the case where vq has an exclusive irq, call synchronize_irq() to
+ * wait for completion.
+ *
+ * note: We can't use disable_irq() since it conflicts with the affinity
+ * managed IRQ that is used by some drivers.
+ */
+ if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+ synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
+
+ vq->reset = true;
+
+ return 0;
+}
+
+static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ struct virtio_pci_vq_info *info;
+ unsigned long flags, index;
+ int err;
+
+ if (!vq->reset)
+ return -EBUSY;
+
+ index = vq->index;
+ info = vp_dev->vqs[index];
+
+ if (vp_modern_get_queue_reset(mdev, index))
+ return -EBUSY;
+
+ if (vp_modern_get_queue_enable(mdev, index))
+ return -EBUSY;
+
+ err = vp_active_vq(vq, info->msix_vector);
+ if (err)
+ return err;
+
+ if (vq->callback) {
+ spin_lock_irqsave(&vp_dev->lock, flags);
+ list_add(&info->node, &vp_dev->virtqueues);
+ spin_unlock_irqrestore(&vp_dev->lock, flags);
+ } else {
+ INIT_LIST_HEAD(&info->node);
+ }
+
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
+ __virtqueue_unbreak(vq);
+#endif
+
+ vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
+ vq->reset = false;
+
+ return 0;
+}
+
+static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
+{
+ return vp_modern_config_vector(&vp_dev->mdev, vector);
+}
+
+static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+ struct virtio_pci_vq_info *info,
+ unsigned int index,
+ void (*callback)(struct virtqueue *vq),
+ const char *name,
+ bool ctx,
+ u16 msix_vec)
+{
+
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ struct virtqueue *vq;
+ u16 num;
+ int err;
+
+ if (index >= vp_modern_get_num_queues(mdev))
+ return ERR_PTR(-EINVAL);
+
+ /* Check if queue is either not available or already active. */
+ num = vp_modern_get_queue_size(mdev, index);
+ if (!num || vp_modern_get_queue_enable(mdev, index))
+ return ERR_PTR(-ENOENT);
+
+ if (!is_power_of_2(num)) {
+ dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
+ return ERR_PTR(-EINVAL);
+ }
+
+ info->msix_vector = msix_vec;
+
+ /* create the vring */
+ vq = vring_create_virtqueue(index, num,
+ SMP_CACHE_BYTES, &vp_dev->vdev,
+ true, true, ctx,
+ vp_notify, callback, name);
+ if (!vq)
+ return ERR_PTR(-ENOMEM);
+
+ vq->num_max = num;
+
+ err = vp_active_vq(vq, msix_vec);
+ if (err)
+ goto err;
+
+ vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
+ if (!vq->priv) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ return vq;
+
+err:
+ vring_del_virtqueue(vq);
+ return ERR_PTR(err);
+}
+
+static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct virtqueue *vq;
+ int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
+
+ if (rc)
+ return rc;
+
+ /* Select and activate all queues. Has to be done last: once we do
+ * this, there's no way to go back except reset.
+ */
+ list_for_each_entry(vq, &vdev->vqs, list)
+ vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
+
+ return 0;
+}
+
+static void del_vq(struct virtio_pci_vq_info *info)
+{
+ struct virtqueue *vq = info->vq;
+ struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+
+ if (vp_dev->msix_enabled)
+ vp_modern_queue_vector(mdev, vq->index,
+ VIRTIO_MSI_NO_VECTOR);
+
+ if (!mdev->notify_base)
+ pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv);
+
+ vring_del_virtqueue(vq);
+}
+
+static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id,
+ u8 *bar, u64 *offset, u64 *len)
+{
+ int pos;
+
+ for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0;
+ pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+ u8 type, cap_len, id, res_bar;
+ u32 tmp32;
+ u64 res_offset, res_length;
+
+ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+ cfg_type), &type);
+ if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
+ continue;
+
+ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+ cap_len), &cap_len);
+ if (cap_len != sizeof(struct virtio_pci_cap64)) {
+ dev_err(&dev->dev, "%s: shm cap with bad size offset:"
+ " %d size: %d\n", __func__, pos, cap_len);
+ continue;
+ }
+
+ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+ id), &id);
+ if (id != required_id)
+ continue;
+
+ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+ bar), &res_bar);
+ if (res_bar >= PCI_STD_NUM_BARS)
+ continue;
+
+ /* Type and ID match, and the BAR value isn't reserved.
+ * Looks good.
+ */
+
+ /* Read the lower 32bit of length and offset */
+ pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
+ offset), &tmp32);
+ res_offset = tmp32;
+ pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap,
+ length), &tmp32);
+ res_length = tmp32;
+
+ /* and now the top half */
+ pci_read_config_dword(dev,
+ pos + offsetof(struct virtio_pci_cap64,
+ offset_hi), &tmp32);
+ res_offset |= ((u64)tmp32) << 32;
+ pci_read_config_dword(dev,
+ pos + offsetof(struct virtio_pci_cap64,
+ length_hi), &tmp32);
+ res_length |= ((u64)tmp32) << 32;
+
+ *bar = res_bar;
+ *offset = res_offset;
+ *len = res_length;
+
+ return pos;
+ }
+ return 0;
+}
+
+static bool vp_get_shm_region(struct virtio_device *vdev,
+ struct virtio_shm_region *region, u8 id)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct pci_dev *pci_dev = vp_dev->pci_dev;
+ u8 bar;
+ u64 offset, len;
+ phys_addr_t phys_addr;
+ size_t bar_len;
+
+ if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len))
+ return false;
+
+ phys_addr = pci_resource_start(pci_dev, bar);
+ bar_len = pci_resource_len(pci_dev, bar);
+
+ if ((offset + len) < offset) {
+ dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n",
+ __func__);
+ return false;
+ }
+
+ if (offset + len > bar_len) {
+ dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n",
+ __func__);
+ return false;
+ }
+
+ region->len = len;
+ region->addr = (u64) phys_addr + offset;
+
+ return true;
+}
+
+static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
+ .get = NULL,
+ .set = NULL,
+ .generation = vp_generation,
+ .get_status = vp_get_status,
+ .set_status = vp_set_status,
+ .reset = vp_reset,
+ .find_vqs = vp_modern_find_vqs,
+ .del_vqs = vp_del_vqs,
+ .synchronize_cbs = vp_synchronize_vectors,
+ .get_features = vp_get_features,
+ .finalize_features = vp_finalize_features,
+ .bus_name = vp_bus_name,
+ .set_vq_affinity = vp_set_vq_affinity,
+ .get_vq_affinity = vp_get_vq_affinity,
+ .get_shm_region = vp_get_shm_region,
+ .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
+ .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
+};
+
+static const struct virtio_config_ops virtio_pci_config_ops = {
+ .get = vp_get,
+ .set = vp_set,
+ .generation = vp_generation,
+ .get_status = vp_get_status,
+ .set_status = vp_set_status,
+ .reset = vp_reset,
+ .find_vqs = vp_modern_find_vqs,
+ .del_vqs = vp_del_vqs,
+ .synchronize_cbs = vp_synchronize_vectors,
+ .get_features = vp_get_features,
+ .finalize_features = vp_finalize_features,
+ .bus_name = vp_bus_name,
+ .set_vq_affinity = vp_set_vq_affinity,
+ .get_vq_affinity = vp_get_vq_affinity,
+ .get_shm_region = vp_get_shm_region,
+ .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
+ .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
+};
+
+/* the PCI probing function */
+int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
+{
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+ struct pci_dev *pci_dev = vp_dev->pci_dev;
+ int err;
+
+ mdev->pci_dev = pci_dev;
+
+ err = vp_modern_probe(mdev);
+ if (err)
+ return err;
+
+ if (mdev->device)
+ vp_dev->vdev.config = &virtio_pci_config_ops;
+ else
+ vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
+
+ vp_dev->config_vector = vp_config_vector;
+ vp_dev->setup_vq = setup_vq;
+ vp_dev->del_vq = del_vq;
+ vp_dev->isr = mdev->isr;
+ vp_dev->vdev.id = mdev->id;
+
+ return 0;
+}
+
+void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
+{
+ struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+
+ vp_modern_remove(mdev);
+}