From 5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 21 Feb 2023 18:24:12 -0800
Subject: Merge tag 'net-next-6.3' of
 git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from Jakub Kicinski:
 "Core:

   - Add dedicated kmem_cache for typical/small skb->head, avoid having
     to access struct page at kfree time, and improve memory use.

   - Introduce sysctl to set default RPS configuration for new netdevs.

   - Define Netlink protocol specification format which can be used to
     describe messages used by each family and auto-generate parsers.
     Add tools for generating kernel data structures and uAPI headers.

   - Expose all net/core sysctls inside netns.

   - Remove 4s sleep in netpoll if carrier is instantly detected on
     boot.

   - Add configurable limit of MDB entries per port, and port-vlan.

   - Continue populating drop reasons throughout the stack.

   - Retire a handful of legacy Qdiscs and classifiers.

  Protocols:

   - Support IPv4 big TCP (TSO frames larger than 64kB).

   - Add IP_LOCAL_PORT_RANGE socket option, to control local port range
     on socket by socket basis.

   - Track and report in procfs number of MPTCP sockets used.

   - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path
     manager.

   - IPv6: don't check net.ipv6.route.max_size and rely on garbage
     collection to free memory (similarly to IPv4).

   - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986).

   - ICMP: add per-rate limit counters.

   - Add support for user scanning requests in ieee802154.

   - Remove static WEP support.

   - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate
     reporting.

   - WiFi 7 EHT channel puncturing support (client & AP).

  BPF:

   - Add a rbtree data structure following the "next-gen data structure"
     precedent set by recently added linked list, that is, by using
     kfunc + kptr instead of adding a new BPF map type.

   - Expose XDP hints via kfuncs with initial support for RX hash and
     timestamp metadata.

   - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to
     better support decap on GRE tunnel devices not operating in collect
     metadata.

   - Improve x86 JIT's codegen for PROBE_MEM runtime error checks.

   - Remove the need for trace_printk_lock for bpf_trace_printk and
     bpf_trace_vprintk helpers.

   - Extend libbpf's bpf_tracing.h support for tracing arguments of
     kprobes/uprobes and syscall as a special case.

   - Significantly reduce the search time for module symbols by
     livepatch and BPF.

   - Enable cpumasks to be used as kptrs, which is useful for tracing
     programs tracking which tasks end up running on which CPUs in
     different time intervals.

   - Add support for BPF trampoline on s390x and riscv64.

   - Add capability to export the XDP features supported by the NIC.

   - Add __bpf_kfunc tag for marking kernel functions as kfuncs.

   - Add cgroup.memory=nobpf kernel parameter option to disable BPF
     memory accounting for container environments.

  Netfilter:

   - Remove the CLUSTERIP target. It has been marked as obsolete for
     years, and we still have WARN splats wrt races of the out-of-band
     /proc interface installed by this target.

   - Add 'destroy' commands to nf_tables. They are identical to the
     existing 'delete' commands, but do not return an error if the
     referenced object (set, chain, rule...) did not exist.

  Driver API:

   - Improve cpumask_local_spread() locality to help NICs set the right
     IRQ affinity on AMD platforms.

   - Separate C22 and C45 MDIO bus transactions more clearly.

   - Introduce new DCB table to control DSCP rewrite on egress.

   - Support configuration of Physical Layer Collision Avoidance (PLCA)
     Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of
     shared medium Ethernet.

   - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing
     preemption of low priority frames by high priority frames.

   - Add support for controlling MACSec offload using netlink SET.

   - Rework devlink instance refcounts to allow registration and
     de-registration under the instance lock. Split the code into
     multiple files, drop some of the unnecessarily granular locks and
     factor out common parts of netlink operation handling.

   - Add TX frame aggregation parameters (for USB drivers).

   - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning
     messages with notifications for debug.

   - Allow offloading of UDP NEW connections via act_ct.

   - Add support for per action HW stats in TC.

   - Support hardware miss to TC action (continue processing in SW from
     a specific point in the action chain).

   - Warn if old Wireless Extension user space interface is used with
     modern cfg80211/mac80211 drivers. Do not support Wireless
     Extensions for Wi-Fi 7 devices at all. Everyone should switch to
     using nl80211 interface instead.

   - Improve the CAN bit timing configuration. Use extack to return
     error messages directly to user space, update the SJW handling,
     including the definition of a new default value that will benefit
     CAN-FD controllers, by increasing their oscillator tolerance.

  New hardware / drivers:

   - Ethernet:
      - nVidia BlueField-3 support (control traffic driver)
      - Ethernet support for imx93 SoCs
      - Motorcomm yt8531 gigabit Ethernet PHY
      - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA)
      - Microchip LAN8841 PHY (incl. cable diagnostics and PTP)
      - Amlogic gxl MDIO mux

   - WiFi:
      - RealTek RTL8188EU (rtl8xxxu)
      - Qualcomm Wi-Fi 7 devices (ath12k)

   - CAN:
      - Renesas R-Car V4H

  Drivers:

   - Bluetooth:
      - Set Per Platform Antenna Gain (PPAG) for Intel controllers.

   - Ethernet NICs:
      - Intel (1G, igc):
         - support TSN / Qbv / packet scheduling features of i226 model
      - Intel (100G, ice):
         - use GNSS subsystem instead of TTY
         - multi-buffer XDP support
         - extend support for GPIO pins to E823 devices
      - nVidia/Mellanox:
         - update the shared buffer configuration on PFC commands
         - implement PTP adjphase function for HW offset control
         - TC support for Geneve and GRE with VF tunnel offload
         - more efficient crypto key management method
         - multi-port eswitch support
      - Netronome/Corigine:
         - add DCB IEEE support
         - support IPsec offloading for NFP3800
      - Freescale/NXP (enetc):
         - support XDP_REDIRECT for XDP non-linear buffers
         - improve reconfig, avoid link flap and waiting for idle
         - support MAC Merge layer
      - Other NICs:
         - sfc/ef100: add basic devlink support for ef100
         - ionic: rx_push mode operation (writing descriptors via MMIO)
         - bnxt: use the auxiliary bus abstraction for RDMA
         - r8169: disable ASPM and reset bus in case of tx timeout
         - cpsw: support QSGMII mode for J721e CPSW9G
         - cpts: support pulse-per-second output
         - ngbe: add an mdio bus driver
         - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing
         - r8152: handle devices with FW with NCM support
         - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation
         - virtio-net: support multi buffer XDP
         - virtio/vsock: replace virtio_vsock_pkt with sk_buff
         - tsnep: XDP support

   - Ethernet high-speed switches:
      - nVidia/Mellanox (mlxsw):
         - add support for latency TLV (in FW control messages)
      - Microchip (sparx5):
         - separate explicit and implicit traffic forwarding rules, make
           the implicit rules always active
         - add support for egress DSCP rewrite
         - IS0 VCAP support (Ingress Classification)
         - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS
           etc.)
         - ES2 VCAP support (Egress Access Control)
         - support for Per-Stream Filtering and Policing (802.1Q,
           8.6.5.1)

   - Ethernet embedded switches:
      - Marvell (mv88e6xxx):
         - add MAB (port auth) offload support
         - enable PTP receive for mv88e6390
      - NXP (ocelot):
         - support MAC Merge layer
         - support for the the vsc7512 internal copper phys
      - Microchip:
         - lan9303: convert to PHYLINK
         - lan966x: support TC flower filter statistics
         - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x
         - lan937x: support Credit Based Shaper configuration
         - ksz9477: support Energy Efficient Ethernet
      - other:
         - qca8k: convert to regmap read/write API, use bulk operations
         - rswitch: Improve TX timestamp accuracy

   - Intel WiFi (iwlwifi):
      - EHT (Wi-Fi 7) rate reporting
      - STEP equalizer support: transfer some STEP (connection to radio
        on platforms with integrated wifi) related parameters from the
        BIOS to the firmware.

   - Qualcomm 802.11ax WiFi (ath11k):
      - IPQ5018 support
      - Fine Timing Measurement (FTM) responder role support
      - channel 177 support

   - MediaTek WiFi (mt76):
      - per-PHY LED support
      - mt7996: EHT (Wi-Fi 7) support
      - Wireless Ethernet Dispatch (WED) reset support
      - switch to using page pool allocator

   - RealTek WiFi (rtw89):
      - support new version of Bluetooth co-existance

   - Mobile:
      - rmnet: support TX aggregation"

* tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits)
  page_pool: add a comment explaining the fragment counter usage
  net: ethtool: fix __ethtool_dev_mm_supported() implementation
  ethtool: pse-pd: Fix double word in comments
  xsk: add linux/vmalloc.h to xsk.c
  sefltests: netdevsim: wait for devlink instance after netns removal
  selftest: fib_tests: Always cleanup before exit
  net/mlx5e: Align IPsec ASO result memory to be as required by hardware
  net/mlx5e: TC, Set CT miss to the specific ct action instance
  net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG
  net/mlx5: Refactor tc miss handling to a single function
  net/mlx5: Kconfig: Make tc offload depend on tc skb extension
  net/sched: flower: Support hardware miss to tc action
  net/sched: flower: Move filter handle initialization earlier
  net/sched: cls_api: Support hardware miss to tc action
  net/sched: Rename user cookie and act cookie
  sfc: fix builds without CONFIG_RTC_LIB
  sfc: clean up some inconsistent indentings
  net/mlx4_en: Introduce flexible array to silence overflow warning
  net: lan966x: Fix possible deadlock inside PTP
  net/ulp: Remove redundant ->clone() test in inet_clone_ulp().
  ...
---
 drivers/infiniband/hw/mlx5/srq_cmd.c | 774 +++++++++++++++++++++++++++++++++++
 1 file changed, 774 insertions(+)
 create mode 100644 drivers/infiniband/hw/mlx5/srq_cmd.c

(limited to 'drivers/infiniband/hw/mlx5/srq_cmd.c')

diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
new file mode 100644
index 000000000..8b3385396
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -0,0 +1,774 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+#include "srq.h"
+#include "qp.h"
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+	u32 log_page_size = in->log_page_size + 12;
+	u32 log_srq_size  = in->log_size;
+	u32 log_rq_stride = in->wqe_shift;
+	u32 page_offset   = in->page_offset;
+	u32 po_quanta	  = 1 << (log_page_size - 6);
+	u32 rq_sz	  = 1 << (log_srq_size + 4 + log_rq_stride);
+	u32 page_size	  = 1 << log_page_size;
+	u32 rq_sz_po      = rq_sz + (page_offset * po_quanta);
+	u32 rq_num_pas    = DIV_ROUND_UP(rq_sz_po, page_size);
+
+	return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+	MLX5_SET(wq,   wq, wq_signature,  !!(in->flags
+		 & MLX5_SRQ_FLAG_WQ_SIG));
+	MLX5_SET(wq,   wq, log_wq_pg_sz,  in->log_page_size);
+	MLX5_SET(wq,   wq, log_wq_stride, in->wqe_shift + 4);
+	MLX5_SET(wq,   wq, log_wq_sz,     in->log_size);
+	MLX5_SET(wq,   wq, page_offset,   in->page_offset);
+	MLX5_SET(wq,   wq, lwm,		  in->lwm);
+	MLX5_SET(wq,   wq, pd,		  in->pd);
+	MLX5_SET64(wq, wq, dbr_addr,	  in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+	MLX5_SET(srqc,   srqc, wq_signature,  !!(in->flags
+		 & MLX5_SRQ_FLAG_WQ_SIG));
+	MLX5_SET(srqc,   srqc, log_page_size, in->log_page_size);
+	MLX5_SET(srqc,   srqc, log_rq_stride, in->wqe_shift);
+	MLX5_SET(srqc,   srqc, log_srq_size,  in->log_size);
+	MLX5_SET(srqc,   srqc, page_offset,   in->page_offset);
+	MLX5_SET(srqc,	 srqc, lwm,	      in->lwm);
+	MLX5_SET(srqc,	 srqc, pd,	      in->pd);
+	MLX5_SET64(srqc, srqc, dbr_addr,      in->db_record);
+	MLX5_SET(srqc,	 srqc, xrcd,	      in->xrcd);
+	MLX5_SET(srqc,	 srqc, cqn,	      in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+	if (MLX5_GET(wq, wq, wq_signature))
+		in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+	in->log_page_size = MLX5_GET(wq,   wq, log_wq_pg_sz);
+	in->wqe_shift	  = MLX5_GET(wq,   wq, log_wq_stride) - 4;
+	in->log_size	  = MLX5_GET(wq,   wq, log_wq_sz);
+	in->page_offset   = MLX5_GET(wq,   wq, page_offset);
+	in->lwm		  = MLX5_GET(wq,   wq, lwm);
+	in->pd		  = MLX5_GET(wq,   wq, pd);
+	in->db_record	  = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+	if (MLX5_GET(srqc, srqc, wq_signature))
+		in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+	in->log_page_size = MLX5_GET(srqc,   srqc, log_page_size);
+	in->wqe_shift	  = MLX5_GET(srqc,   srqc, log_rq_stride);
+	in->log_size	  = MLX5_GET(srqc,   srqc, log_srq_size);
+	in->page_offset   = MLX5_GET(srqc,   srqc, page_offset);
+	in->lwm		  = MLX5_GET(srqc,   srqc, lwm);
+	in->pd		  = MLX5_GET(srqc,   srqc, pd);
+	in->db_record	  = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
+{
+	struct mlx5_srq_table *table = &dev->srq_table;
+	struct mlx5_core_srq *srq;
+
+	xa_lock_irq(&table->array);
+	srq = xa_load(&table->array, srqn);
+	if (srq)
+		refcount_inc(&srq->common.refcount);
+	xa_unlock_irq(&table->array);
+
+	return srq;
+}
+
+static int __set_srq_page_size(struct mlx5_srq_attr *in,
+			       unsigned long page_size)
+{
+	if (!page_size)
+		return -EINVAL;
+	in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT;
+
+	if (WARN_ON(get_pas_size(in) !=
+		    ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64)))
+		return -EINVAL;
+	return 0;
+}
+
+#define set_srq_page_size(in, typ, log_pgsz_fld)                               \
+	__set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff(           \
+					(in)->umem, typ, log_pgsz_fld,         \
+					MLX5_ADAPTER_PAGE_SHIFT, page_offset,  \
+					64, &(in)->page_offset))
+
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			  struct mlx5_srq_attr *in)
+{
+	u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+	void *create_in;
+	void *srqc;
+	void *pas;
+	int pas_size;
+	int inlen;
+	int err;
+
+	if (in->umem) {
+		err = set_srq_page_size(in, srqc, log_page_size);
+		if (err)
+			return err;
+	}
+
+	pas_size  = get_pas_size(in);
+	inlen	  = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+	create_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!create_in)
+		return -ENOMEM;
+
+	MLX5_SET(create_srq_in, create_in, uid, in->uid);
+	srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+	pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+	set_srqc(srqc, in);
+	if (in->umem)
+		mlx5_ib_populate_pas(
+			in->umem,
+			1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+			pas, 0);
+	else
+		memcpy(pas, in->pas, pas_size);
+
+	MLX5_SET(create_srq_in, create_in, opcode,
+		 MLX5_CMD_OP_CREATE_SRQ);
+
+	err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+			    sizeof(create_out));
+	kvfree(create_in);
+	if (!err) {
+		srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+		srq->uid = in->uid;
+	}
+
+	return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_srq_in)] = {};
+
+	MLX5_SET(destroy_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+	MLX5_SET(destroy_srq_in, in, srqn, srq->srqn);
+	MLX5_SET(destroy_srq_in, in, uid, srq->uid);
+
+	return mlx5_cmd_exec_in(dev->mdev, destroy_srq, in);
+}
+
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+		       u16 lwm, int is_srq)
+{
+	u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
+
+	MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+	MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+	MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+	MLX5_SET(arm_rq_in, in, lwm, lwm);
+	MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+	return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
+}
+
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			 struct mlx5_srq_attr *out)
+{
+	u32 in[MLX5_ST_SZ_DW(query_srq_in)] = {};
+	u32 *srq_out;
+	void *srqc;
+	int err;
+
+	srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+	if (!srq_out)
+		return -ENOMEM;
+
+	MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ);
+	MLX5_SET(query_srq_in, in, srqn, srq->srqn);
+	err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, srq_out);
+	if (err)
+		goto out;
+
+	srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+	get_srqc(srqc, out);
+	if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+		out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+	kvfree(srq_out);
+	return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+			      struct mlx5_core_srq *srq,
+			      struct mlx5_srq_attr *in)
+{
+	u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+	void *create_in;
+	void *xrc_srqc;
+	void *pas;
+	int pas_size;
+	int inlen;
+	int err;
+
+	if (in->umem) {
+		err = set_srq_page_size(in, xrc_srqc, log_page_size);
+		if (err)
+			return err;
+	}
+
+	pas_size  = get_pas_size(in);
+	inlen	  = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+	create_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!create_in)
+		return -ENOMEM;
+
+	MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
+	xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+				xrc_srq_context_entry);
+	pas	 = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+	set_srqc(xrc_srqc, in);
+	MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+	if (in->umem)
+		mlx5_ib_populate_pas(
+			in->umem,
+			1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+			pas, 0);
+	else
+		memcpy(pas, in->pas, pas_size);
+	MLX5_SET(create_xrc_srq_in, create_in, opcode,
+		 MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+	memset(create_out, 0, sizeof(create_out));
+	err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+			    sizeof(create_out));
+	if (err)
+		goto out;
+
+	srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+	srq->uid = in->uid;
+out:
+	kvfree(create_in);
+	return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+			       struct mlx5_core_srq *srq)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {};
+
+	MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
+	MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, srq->srqn);
+	MLX5_SET(destroy_xrc_srq_in, in, uid, srq->uid);
+
+	return mlx5_cmd_exec_in(dev->mdev, destroy_xrc_srq, in);
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			   u16 lwm)
+{
+	u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {};
+
+	MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+	MLX5_SET(arm_xrc_srq_in, in, op_mod,
+		 MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+	MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, srq->srqn);
+	MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
+	MLX5_SET(arm_xrc_srq_in, in, uid, srq->uid);
+
+	return  mlx5_cmd_exec_in(dev->mdev, arm_xrc_srq, in);
+}
+
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+			     struct mlx5_core_srq *srq,
+			     struct mlx5_srq_attr *out)
+{
+	u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {};
+	u32 *xrcsrq_out;
+	void *xrc_srqc;
+	int err;
+
+	xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+	if (!xrcsrq_out)
+		return -ENOMEM;
+
+	MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ);
+	MLX5_SET(query_xrc_srq_in, in, xrc_srqn, srq->srqn);
+
+	err = mlx5_cmd_exec_inout(dev->mdev, query_xrc_srq, in, xrcsrq_out);
+	if (err)
+		goto out;
+
+	xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+				xrc_srq_context_entry);
+	get_srqc(xrc_srqc, out);
+	if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+		out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+	kvfree(xrcsrq_out);
+	return err;
+}
+
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			  struct mlx5_srq_attr *in)
+{
+	void *create_out = NULL;
+	void *create_in = NULL;
+	void *rmpc;
+	void *wq;
+	void *pas;
+	int pas_size;
+	int outlen;
+	int inlen;
+	int err;
+
+	if (in->umem) {
+		err = set_srq_page_size(in, wq, log_wq_pg_sz);
+		if (err)
+			return err;
+	}
+
+	pas_size = get_pas_size(in);
+	inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+	outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
+	create_in = kvzalloc(inlen, GFP_KERNEL);
+	create_out = kvzalloc(outlen, GFP_KERNEL);
+	if (!create_in || !create_out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+	wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+	MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+	pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas);
+
+	set_wq(wq, in);
+	if (in->umem)
+		mlx5_ib_populate_pas(
+			in->umem,
+			1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+			pas, 0);
+	else
+		memcpy(pas, in->pas, pas_size);
+
+	MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+	err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+	if (!err) {
+		srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
+		srq->uid = in->uid;
+	}
+
+out:
+	kvfree(create_in);
+	kvfree(create_out);
+	return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
+
+	MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+	MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+	MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+	return mlx5_cmd_exec_in(dev->mdev, destroy_rmp, in);
+}
+
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+		       u16 lwm)
+{
+	void *out = NULL;
+	void *in = NULL;
+	void *rmpc;
+	void *wq;
+	void *bitmask;
+	int outlen;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+	outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!in || !out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rmpc =	  MLX5_ADDR_OF(modify_rmp_in,   in,   ctx);
+	bitmask = MLX5_ADDR_OF(modify_rmp_in,   in,   bitmask);
+	wq   =	  MLX5_ADDR_OF(rmpc,	        rmpc, wq);
+
+	MLX5_SET(modify_rmp_in, in,	 rmp_state, MLX5_RMPC_STATE_RDY);
+	MLX5_SET(modify_rmp_in, in,	 rmpn,      srq->srqn);
+	MLX5_SET(modify_rmp_in, in, uid, srq->uid);
+	MLX5_SET(wq,		wq,	 lwm,	    lwm);
+	MLX5_SET(rmp_bitmask,	bitmask, lwm,	    1);
+	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+	MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+
+	err = mlx5_cmd_exec_inout(dev->mdev, modify_rmp, in, out);
+
+out:
+	kvfree(in);
+	kvfree(out);
+	return err;
+}
+
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			 struct mlx5_srq_attr *out)
+{
+	u32 *rmp_out = NULL;
+	u32 *rmp_in = NULL;
+	void *rmpc;
+	int outlen;
+	int inlen;
+	int err;
+
+	outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+	inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
+
+	rmp_out = kvzalloc(outlen, GFP_KERNEL);
+	rmp_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!rmp_out || !rmp_in) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+	MLX5_SET(query_rmp_in, rmp_in, rmpn,   srq->srqn);
+	err = mlx5_cmd_exec_inout(dev->mdev, query_rmp, rmp_in, rmp_out);
+	if (err)
+		goto out;
+
+	rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+	get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+	if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+		out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+	kvfree(rmp_out);
+	kvfree(rmp_in);
+	return err;
+}
+
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			  struct mlx5_srq_attr *in)
+{
+	u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+	void *create_in;
+	void *xrqc;
+	void *wq;
+	void *pas;
+	int pas_size;
+	int inlen;
+	int err;
+
+	if (in->umem) {
+		err = set_srq_page_size(in, wq, log_wq_pg_sz);
+		if (err)
+			return err;
+	}
+
+	pas_size = get_pas_size(in);
+	inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+	create_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!create_in)
+		return -ENOMEM;
+
+	xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+	wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+	pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas);
+
+	set_wq(wq, in);
+	if (in->umem)
+		mlx5_ib_populate_pas(
+			in->umem,
+			1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+			pas, 0);
+	else
+		memcpy(pas, in->pas, pas_size);
+
+	if (in->type == IB_SRQT_TM) {
+		MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+		if (in->flags & MLX5_SRQ_FLAG_RNDV)
+			MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+		MLX5_SET(xrqc, xrqc,
+			 tag_matching_topology_context.log_matching_list_sz,
+			 in->tm_log_list_size);
+	}
+	MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+	MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+	MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+	MLX5_SET(create_xrq_in, create_in, uid, in->uid);
+	err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+			    sizeof(create_out));
+	kvfree(create_in);
+	if (!err) {
+		srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+		srq->uid = in->uid;
+	}
+
+	return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {};
+
+	MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+	MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+	MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
+
+	return mlx5_cmd_exec_in(dev->mdev, destroy_xrq, in);
+}
+
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
+		       struct mlx5_core_srq *srq,
+		       u16 lwm)
+{
+	u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
+
+	MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+	MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+	MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+	MLX5_SET(arm_rq_in, in, lwm, lwm);
+	MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+	return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
+}
+
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			 struct mlx5_srq_attr *out)
+{
+	u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {};
+	u32 *xrq_out;
+	int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+	void *xrqc;
+	int err;
+
+	xrq_out = kvzalloc(outlen, GFP_KERNEL);
+	if (!xrq_out)
+		return -ENOMEM;
+
+	MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+	MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+	err = mlx5_cmd_exec_inout(dev->mdev, query_xrq, in, xrq_out);
+	if (err)
+		goto out;
+
+	xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+	get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+	if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+		out->flags |= MLX5_SRQ_FLAG_ERR;
+	out->tm_next_tag =
+		MLX5_GET(xrqc, xrqc,
+			 tag_matching_topology_context.append_next_index);
+	out->tm_hw_phase_cnt =
+		MLX5_GET(xrqc, xrqc,
+			 tag_matching_topology_context.hw_phase_cnt);
+	out->tm_sw_phase_cnt =
+		MLX5_GET(xrqc, xrqc,
+			 tag_matching_topology_context.sw_phase_cnt);
+
+out:
+	kvfree(xrq_out);
+	return err;
+}
+
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			    struct mlx5_srq_attr *in)
+{
+	if (!dev->mdev->issi)
+		return create_srq_cmd(dev, srq, in);
+	switch (srq->common.res) {
+	case MLX5_RES_XSRQ:
+		return create_xrc_srq_cmd(dev, srq, in);
+	case MLX5_RES_XRQ:
+		return create_xrq_cmd(dev, srq, in);
+	default:
+		return create_rmp_cmd(dev, srq, in);
+	}
+}
+
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+	if (!dev->mdev->issi)
+		return destroy_srq_cmd(dev, srq);
+	switch (srq->common.res) {
+	case MLX5_RES_XSRQ:
+		return destroy_xrc_srq_cmd(dev, srq);
+	case MLX5_RES_XRQ:
+		return destroy_xrq_cmd(dev, srq);
+	default:
+		return destroy_rmp_cmd(dev, srq);
+	}
+}
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+			struct mlx5_srq_attr *in)
+{
+	struct mlx5_srq_table *table = &dev->srq_table;
+	int err;
+
+	switch (in->type) {
+	case IB_SRQT_XRC:
+		srq->common.res = MLX5_RES_XSRQ;
+		break;
+	case IB_SRQT_TM:
+		srq->common.res = MLX5_RES_XRQ;
+		break;
+	default:
+		srq->common.res = MLX5_RES_SRQ;
+	}
+
+	err = create_srq_split(dev, srq, in);
+	if (err)
+		return err;
+
+	refcount_set(&srq->common.refcount, 1);
+	init_completion(&srq->common.free);
+
+	err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL));
+	if (err)
+		goto err_destroy_srq_split;
+
+	return 0;
+
+err_destroy_srq_split:
+	destroy_srq_split(dev, srq);
+
+	return err;
+}
+
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+	struct mlx5_srq_table *table = &dev->srq_table;
+	struct mlx5_core_srq *tmp;
+	int err;
+
+	/* Delete entry, but leave index occupied */
+	tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+	if (WARN_ON(tmp != srq))
+		return xa_err(tmp) ?: -EINVAL;
+
+	err = destroy_srq_split(dev, srq);
+	if (err) {
+		/*
+		 * We don't need to check returned result for an error,
+		 * because  we are storing in pre-allocated space xarray
+		 * entry and it can't fail at this stage.
+		 */
+		xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
+		return err;
+	}
+	xa_erase_irq(&table->array, srq->srqn);
+
+	mlx5_core_res_put(&srq->common);
+	wait_for_completion(&srq->common.free);
+	return 0;
+}
+
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+		       struct mlx5_srq_attr *out)
+{
+	if (!dev->mdev->issi)
+		return query_srq_cmd(dev, srq, out);
+	switch (srq->common.res) {
+	case MLX5_RES_XSRQ:
+		return query_xrc_srq_cmd(dev, srq, out);
+	case MLX5_RES_XRQ:
+		return query_xrq_cmd(dev, srq, out);
+	default:
+		return query_rmp_cmd(dev, srq, out);
+	}
+}
+
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+		     u16 lwm, int is_srq)
+{
+	if (!dev->mdev->issi)
+		return arm_srq_cmd(dev, srq, lwm, is_srq);
+	switch (srq->common.res) {
+	case MLX5_RES_XSRQ:
+		return arm_xrc_srq_cmd(dev, srq, lwm);
+	case MLX5_RES_XRQ:
+		return arm_xrq_cmd(dev, srq, lwm);
+	default:
+		return arm_rmp_cmd(dev, srq, lwm);
+	}
+}
+
+static int srq_event_notifier(struct notifier_block *nb,
+			      unsigned long type, void *data)
+{
+	struct mlx5_srq_table *table;
+	struct mlx5_core_srq *srq;
+	struct mlx5_eqe *eqe;
+	u32 srqn;
+
+	if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+	    type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+		return NOTIFY_DONE;
+
+	table = container_of(nb, struct mlx5_srq_table, nb);
+
+	eqe = data;
+	srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+	xa_lock(&table->array);
+	srq = xa_load(&table->array, srqn);
+	if (srq)
+		refcount_inc(&srq->common.refcount);
+	xa_unlock(&table->array);
+
+	if (!srq)
+		return NOTIFY_OK;
+
+	srq->event(srq, eqe->type);
+
+	mlx5_core_res_put(&srq->common);
+
+	return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_srq_table *table = &dev->srq_table;
+
+	memset(table, 0, sizeof(*table));
+	xa_init_flags(&table->array, XA_FLAGS_LOCK_IRQ);
+
+	table->nb.notifier_call = srq_event_notifier;
+	mlx5_notifier_register(dev->mdev, &table->nb);
+
+	return 0;
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_srq_table *table = &dev->srq_table;
+
+	mlx5_notifier_unregister(dev->mdev, &table->nb);
+}
-- 
cgit v1.2.3