From 5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 Feb 2023 18:24:12 -0800 Subject: Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ... --- drivers/crypto/marvell/octeontx/otx_cptvf_main.c | 976 +++++++++++++++++++++++ 1 file changed, 976 insertions(+) create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_main.c (limited to 'drivers/crypto/marvell/octeontx/otx_cptvf_main.c') diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c new file mode 100644 index 000000000..88a41d1ca --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define DRV_NAME "octeontx-cptvf" +#define DRV_VERSION "1.0" + +static void vq_work_handler(unsigned long data) +{ + struct otx_cptvf_wqe_info *cwqe_info = + (struct otx_cptvf_wqe_info *) data; + + otx_cpt_post_process(&cwqe_info->vq_wqe[0]); +} + +static int init_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kfree_sensitive(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo) +{ + struct otx_cpt_pending_queue *queue; + int i; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kfree_sensitive((queue->head)); + queue->front = 0; + queue->rear = 0; + queue->qlen = 0; + } + pqinfo->num_queues = 0; +} + +static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen, + u32 num_queues) +{ + struct otx_cpt_pending_queue *queue = NULL; + int ret; + u32 i; + + pqinfo->num_queues = num_queues; + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kcalloc(qlen, sizeof(*queue->head), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->pending_count = 0; + queue->front = 0; + queue->rear = 0; + queue->qlen = qlen; + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen, + u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!num_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + return ret; + } + return 0; +} + +static void cleanup_pending_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->num_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo) +{ + struct otx_cpt_cmd_queue *queue = NULL; + struct otx_cpt_cmd_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + int i; + + /* clean up for each queue */ + for (i = 0; i < cptvf->num_queues; i++) { + queue = &cqinfo->queue[i]; + + while (!list_empty(&cqinfo->queue[i].chead)) { + chunk = list_first_entry(&cqinfo->queue[i].chead, + struct otx_cpt_cmd_chunk, nextchunk); + + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + list_del(&chunk->nextchunk); + kfree_sensitive(chunk); + } + queue->num_chunks = 0; + queue->idx = 0; + + } +} + +static int alloc_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo, + u32 qlen) +{ + struct otx_cpt_cmd_chunk *curr, *first, *last; + struct otx_cpt_cmd_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + size_t q_size, c_size, rem_q_size; + u32 qcsize_bytes; + int i; + + + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * OTX_CPT_INST_SIZE; + + qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE; + + /* per queue initialization */ + for (i = 0; i < cptvf->num_queues; i++) { + rem_q_size = q_size; + first = NULL; + last = NULL; + + queue = &cqinfo->queue[i]; + INIT_LIST_HEAD(&queue->chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, + "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->num_chunks); + goto free_curr; + } + curr->size = c_size; + + if (queue->num_chunks == 0) { + first = curr; + queue->base = first; + } + list_add_tail(&curr->nextchunk, + &cqinfo->queue[i].chead); + + queue->num_chunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = + (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* + * Make the queue circular, tie back last chunk entry to head + */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + } + return 0; +free_curr: + kfree(curr); +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n", + cptvf->num_queues); + return ret; + } + return ret; +} + +static void cleanup_command_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->num_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct otx_cptvf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 max_dev_queues = 0; + int ret; + + max_dev_queues = OTX_CPT_NUM_QS_PER_VF; + /* possible cpus */ + num_queues = min_t(u32, num_queues, max_dev_queues); + cptvf->num_queues = num_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + num_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val) +{ + union otx_cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0)); + vqx_ctl.s.ena = val; + writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0)); +} + +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); +} + +static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val) +{ + union otx_cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); + vqx_inprg.s.inflight = val; + writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); +} + +static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.num_wait = val; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + +static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.num_wait; +} + +static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.time_wait = time; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + + +static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.time_wait; +} + +static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable SWERR interrupts for the requested VF */ + vqx_misc_ena.s.swerr = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable MBOX interrupt for the requested VF */ + vqx_misc_ena.s.mbox = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); + /* Enable DONE interrupt for the requested VF */ + vqx_done_ena.s.done = 1; + writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); +} + +static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf) +{ + return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq, + void *arg) +{ + struct otx_cptvf *cptvf = arg; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /* Check for MISC interrupt types */ + if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + otx_cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /* Clear doorbell count */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, + "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, + "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, + "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, + "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf, + int qno) +{ + struct otx_cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->num_queues)) + return NULL; + nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done vqx_done; + + vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf, + u32 ackcnt) +{ + union otx_cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); + vqx_dack_cnt.s.done_ack = ackcnt; + writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); +} + +static irqreturn_t cptvf_done_intr_handler(int __always_unused irq, + void *cptvf_dev) +{ + struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct otx_cptvf_wqe *wqe; + + /* + * Acknowledge the number of scheduled completions for + * processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)\n", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, + "Allocation failed for affinity_mask for VF %d\n", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val) +{ + union otx_cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0)); +} + +static void cptvf_device_init(struct otx_cptvf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD); + cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY; +} + +static ssize_t vf_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + char *msg; + + switch (cptvf->vftype) { + case OTX_CPT_AE_TYPES: + msg = "AE"; + break; + + case OTX_CPT_SE_TYPES: + msg = "SE"; + break; + + default: + msg = "Invalid"; + } + + return sysfs_emit(buf, "%s\n", msg); +} + +static ssize_t vf_engine_group_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", cptvf->vfgrp); +} + +static ssize_t vf_engine_group_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + int val, ret; + + ret = kstrtoint(buf, 10, &val); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + if (val >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Engine group >= than max available groups %d\n", + OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val); + if (ret) + return ret; + + return count; +} + +static ssize_t vf_coalesc_time_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + cptvf_read_vq_done_timewait(cptvf)); +} + +static ssize_t vf_coalesc_num_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + cptvf_read_vq_done_numwait(cptvf)); +} + +static ssize_t vf_coalesc_time_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_TIME_WAIT || + val > OTX_CPT_COALESC_MAX_TIME_WAIT) + return -EINVAL; + + cptvf_write_vq_done_timewait(cptvf, val); + return count; +} + +static ssize_t vf_coalesc_num_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_NUM_WAIT || + val > OTX_CPT_COALESC_MAX_NUM_WAIT) + return -EINVAL; + + cptvf_write_vq_done_numwait(cptvf, val); + return count; +} + +static DEVICE_ATTR_RO(vf_type); +static DEVICE_ATTR_RW(vf_engine_group); +static DEVICE_ATTR_RW(vf_coalesc_time_wait); +static DEVICE_ATTR_RW(vf_coalesc_num_wait); + +static struct attribute *otx_cptvf_attrs[] = { + &dev_attr_vf_type.attr, + &dev_attr_vf_engine_group.attr, + &dev_attr_vf_coalesc_time_wait.attr, + &dev_attr_vf_coalesc_num_wait.attr, + NULL +}; + +static const struct attribute_group otx_cptvf_sysfs_group = { + .attrs = otx_cptvf_attrs, +}; + +static int otx_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cptvf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto disable_device; + } + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); + goto release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS, + OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + OTX_CPT_VF_MSIX_VECTORS); + goto unmap_region; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request misc irq\n"); + goto free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check cpt pf status, gets chip ID / device Id from PF if ready */ + err = otx_cptvf_check_pf_ready(cptvf); + if (err) + goto free_misc_irq; + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed\n"); + goto free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = otx_cptvf_send_vq_size_msg(cptvf); + if (err) + goto sw_cleanup; + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp); + if (err) + goto sw_cleanup; + + cptvf->priority = 1; + err = otx_cptvf_send_vf_priority_msg(cptvf); + if (err) + goto sw_cleanup; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request done irq\n"); + goto free_done_irq; + } + + /* Enable done interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = otx_cptvf_send_vf_up(cptvf); + if (err) + goto free_irq_affinity; + + /* Initialize algorithms and set ops */ + err = otx_cpt_crypto_init(pdev, THIS_MODULE, + cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE, + cptvf->vftype, 1, cptvf->num_vfs); + if (err) { + dev_err(dev, "Failed to register crypto algs\n"); + goto free_irq_affinity; + } + + err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group); + if (err) { + dev_err(dev, "Creating sysfs entries failed\n"); + goto crypto_exit; + } + + return 0; + +crypto_exit: + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); +free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +free_done_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); +sw_cleanup: + cptvf_sw_cleanup(cptvf); +free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +free_vectors: + pci_free_irq_vectors(cptvf->pdev); +unmap_region: + pci_iounmap(pdev, cptvf->reg_base); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cptvf_remove(struct pci_dev *pdev) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (otx_cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg\n"); + } else { + sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group); + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + cptvf_sw_cleanup(cptvf); + pci_free_irq_vectors(cptvf->pdev); + pci_iounmap(pdev, cptvf->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +/* Supported devices */ +static const struct pci_device_id otx_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cptvf_id_table, + .probe = otx_cptvf_probe, + .remove = otx_cptvf_remove, +}; + +module_pci_driver(otx_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table); -- cgit v1.2.3