From 5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 21 Feb 2023 18:24:12 -0800
Subject: Merge tag 'net-next-6.3' of
 git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from Jakub Kicinski:
 "Core:

   - Add dedicated kmem_cache for typical/small skb->head, avoid having
     to access struct page at kfree time, and improve memory use.

   - Introduce sysctl to set default RPS configuration for new netdevs.

   - Define Netlink protocol specification format which can be used to
     describe messages used by each family and auto-generate parsers.
     Add tools for generating kernel data structures and uAPI headers.

   - Expose all net/core sysctls inside netns.

   - Remove 4s sleep in netpoll if carrier is instantly detected on
     boot.

   - Add configurable limit of MDB entries per port, and port-vlan.

   - Continue populating drop reasons throughout the stack.

   - Retire a handful of legacy Qdiscs and classifiers.

  Protocols:

   - Support IPv4 big TCP (TSO frames larger than 64kB).

   - Add IP_LOCAL_PORT_RANGE socket option, to control local port range
     on socket by socket basis.

   - Track and report in procfs number of MPTCP sockets used.

   - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path
     manager.

   - IPv6: don't check net.ipv6.route.max_size and rely on garbage
     collection to free memory (similarly to IPv4).

   - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986).

   - ICMP: add per-rate limit counters.

   - Add support for user scanning requests in ieee802154.

   - Remove static WEP support.

   - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate
     reporting.

   - WiFi 7 EHT channel puncturing support (client & AP).

  BPF:

   - Add a rbtree data structure following the "next-gen data structure"
     precedent set by recently added linked list, that is, by using
     kfunc + kptr instead of adding a new BPF map type.

   - Expose XDP hints via kfuncs with initial support for RX hash and
     timestamp metadata.

   - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to
     better support decap on GRE tunnel devices not operating in collect
     metadata.

   - Improve x86 JIT's codegen for PROBE_MEM runtime error checks.

   - Remove the need for trace_printk_lock for bpf_trace_printk and
     bpf_trace_vprintk helpers.

   - Extend libbpf's bpf_tracing.h support for tracing arguments of
     kprobes/uprobes and syscall as a special case.

   - Significantly reduce the search time for module symbols by
     livepatch and BPF.

   - Enable cpumasks to be used as kptrs, which is useful for tracing
     programs tracking which tasks end up running on which CPUs in
     different time intervals.

   - Add support for BPF trampoline on s390x and riscv64.

   - Add capability to export the XDP features supported by the NIC.

   - Add __bpf_kfunc tag for marking kernel functions as kfuncs.

   - Add cgroup.memory=nobpf kernel parameter option to disable BPF
     memory accounting for container environments.

  Netfilter:

   - Remove the CLUSTERIP target. It has been marked as obsolete for
     years, and we still have WARN splats wrt races of the out-of-band
     /proc interface installed by this target.

   - Add 'destroy' commands to nf_tables. They are identical to the
     existing 'delete' commands, but do not return an error if the
     referenced object (set, chain, rule...) did not exist.

  Driver API:

   - Improve cpumask_local_spread() locality to help NICs set the right
     IRQ affinity on AMD platforms.

   - Separate C22 and C45 MDIO bus transactions more clearly.

   - Introduce new DCB table to control DSCP rewrite on egress.

   - Support configuration of Physical Layer Collision Avoidance (PLCA)
     Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of
     shared medium Ethernet.

   - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing
     preemption of low priority frames by high priority frames.

   - Add support for controlling MACSec offload using netlink SET.

   - Rework devlink instance refcounts to allow registration and
     de-registration under the instance lock. Split the code into
     multiple files, drop some of the unnecessarily granular locks and
     factor out common parts of netlink operation handling.

   - Add TX frame aggregation parameters (for USB drivers).

   - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning
     messages with notifications for debug.

   - Allow offloading of UDP NEW connections via act_ct.

   - Add support for per action HW stats in TC.

   - Support hardware miss to TC action (continue processing in SW from
     a specific point in the action chain).

   - Warn if old Wireless Extension user space interface is used with
     modern cfg80211/mac80211 drivers. Do not support Wireless
     Extensions for Wi-Fi 7 devices at all. Everyone should switch to
     using nl80211 interface instead.

   - Improve the CAN bit timing configuration. Use extack to return
     error messages directly to user space, update the SJW handling,
     including the definition of a new default value that will benefit
     CAN-FD controllers, by increasing their oscillator tolerance.

  New hardware / drivers:

   - Ethernet:
      - nVidia BlueField-3 support (control traffic driver)
      - Ethernet support for imx93 SoCs
      - Motorcomm yt8531 gigabit Ethernet PHY
      - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA)
      - Microchip LAN8841 PHY (incl. cable diagnostics and PTP)
      - Amlogic gxl MDIO mux

   - WiFi:
      - RealTek RTL8188EU (rtl8xxxu)
      - Qualcomm Wi-Fi 7 devices (ath12k)

   - CAN:
      - Renesas R-Car V4H

  Drivers:

   - Bluetooth:
      - Set Per Platform Antenna Gain (PPAG) for Intel controllers.

   - Ethernet NICs:
      - Intel (1G, igc):
         - support TSN / Qbv / packet scheduling features of i226 model
      - Intel (100G, ice):
         - use GNSS subsystem instead of TTY
         - multi-buffer XDP support
         - extend support for GPIO pins to E823 devices
      - nVidia/Mellanox:
         - update the shared buffer configuration on PFC commands
         - implement PTP adjphase function for HW offset control
         - TC support for Geneve and GRE with VF tunnel offload
         - more efficient crypto key management method
         - multi-port eswitch support
      - Netronome/Corigine:
         - add DCB IEEE support
         - support IPsec offloading for NFP3800
      - Freescale/NXP (enetc):
         - support XDP_REDIRECT for XDP non-linear buffers
         - improve reconfig, avoid link flap and waiting for idle
         - support MAC Merge layer
      - Other NICs:
         - sfc/ef100: add basic devlink support for ef100
         - ionic: rx_push mode operation (writing descriptors via MMIO)
         - bnxt: use the auxiliary bus abstraction for RDMA
         - r8169: disable ASPM and reset bus in case of tx timeout
         - cpsw: support QSGMII mode for J721e CPSW9G
         - cpts: support pulse-per-second output
         - ngbe: add an mdio bus driver
         - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing
         - r8152: handle devices with FW with NCM support
         - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation
         - virtio-net: support multi buffer XDP
         - virtio/vsock: replace virtio_vsock_pkt with sk_buff
         - tsnep: XDP support

   - Ethernet high-speed switches:
      - nVidia/Mellanox (mlxsw):
         - add support for latency TLV (in FW control messages)
      - Microchip (sparx5):
         - separate explicit and implicit traffic forwarding rules, make
           the implicit rules always active
         - add support for egress DSCP rewrite
         - IS0 VCAP support (Ingress Classification)
         - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS
           etc.)
         - ES2 VCAP support (Egress Access Control)
         - support for Per-Stream Filtering and Policing (802.1Q,
           8.6.5.1)

   - Ethernet embedded switches:
      - Marvell (mv88e6xxx):
         - add MAB (port auth) offload support
         - enable PTP receive for mv88e6390
      - NXP (ocelot):
         - support MAC Merge layer
         - support for the the vsc7512 internal copper phys
      - Microchip:
         - lan9303: convert to PHYLINK
         - lan966x: support TC flower filter statistics
         - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x
         - lan937x: support Credit Based Shaper configuration
         - ksz9477: support Energy Efficient Ethernet
      - other:
         - qca8k: convert to regmap read/write API, use bulk operations
         - rswitch: Improve TX timestamp accuracy

   - Intel WiFi (iwlwifi):
      - EHT (Wi-Fi 7) rate reporting
      - STEP equalizer support: transfer some STEP (connection to radio
        on platforms with integrated wifi) related parameters from the
        BIOS to the firmware.

   - Qualcomm 802.11ax WiFi (ath11k):
      - IPQ5018 support
      - Fine Timing Measurement (FTM) responder role support
      - channel 177 support

   - MediaTek WiFi (mt76):
      - per-PHY LED support
      - mt7996: EHT (Wi-Fi 7) support
      - Wireless Ethernet Dispatch (WED) reset support
      - switch to using page pool allocator

   - RealTek WiFi (rtw89):
      - support new version of Bluetooth co-existance

   - Mobile:
      - rmnet: support TX aggregation"

* tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits)
  page_pool: add a comment explaining the fragment counter usage
  net: ethtool: fix __ethtool_dev_mm_supported() implementation
  ethtool: pse-pd: Fix double word in comments
  xsk: add linux/vmalloc.h to xsk.c
  sefltests: netdevsim: wait for devlink instance after netns removal
  selftest: fib_tests: Always cleanup before exit
  net/mlx5e: Align IPsec ASO result memory to be as required by hardware
  net/mlx5e: TC, Set CT miss to the specific ct action instance
  net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG
  net/mlx5: Refactor tc miss handling to a single function
  net/mlx5: Kconfig: Make tc offload depend on tc skb extension
  net/sched: flower: Support hardware miss to tc action
  net/sched: flower: Move filter handle initialization earlier
  net/sched: cls_api: Support hardware miss to tc action
  net/sched: Rename user cookie and act cookie
  sfc: fix builds without CONFIG_RTC_LIB
  sfc: clean up some inconsistent indentings
  net/mlx4_en: Introduce flexible array to silence overflow warning
  net: lan966x: Fix possible deadlock inside PTP
  net/ulp: Remove redundant ->clone() test in inet_clone_ulp().
  ...
---
 net/rxrpc/rxperf.c | 624 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 624 insertions(+)
 create mode 100644 net/rxrpc/rxperf.c

(limited to 'net/rxrpc/rxperf.c')

diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
new file mode 100644
index 000000000..4a2e90015
--- /dev/null
+++ b/net/rxrpc/rxperf.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* In-kernel rxperf server for testing purposes.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) "rxperf: " fmt
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+MODULE_DESCRIPTION("rxperf test server (afs)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+#define RXPERF_PORT		7009
+#define RX_PERF_SERVICE		147
+#define RX_PERF_VERSION		3
+#define RX_PERF_SEND		0
+#define RX_PERF_RECV		1
+#define RX_PERF_RPC		3
+#define RX_PERF_FILE		4
+#define RX_PERF_MAGIC_COOKIE	0x4711
+
+struct rxperf_proto_params {
+	__be32		version;
+	__be32		type;
+	__be32		rsize;
+	__be32		wsize;
+} __packed;
+
+static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 };
+static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+
+enum rxperf_call_state {
+	RXPERF_CALL_SV_AWAIT_PARAMS,	/* Server: Awaiting parameter block */
+	RXPERF_CALL_SV_AWAIT_REQUEST,	/* Server: Awaiting request data */
+	RXPERF_CALL_SV_REPLYING,	/* Server: Replying */
+	RXPERF_CALL_SV_AWAIT_ACK,	/* Server: Awaiting final ACK */
+	RXPERF_CALL_COMPLETE,		/* Completed or failed */
+};
+
+struct rxperf_call {
+	struct rxrpc_call	*rxcall;
+	struct iov_iter		iter;
+	struct kvec		kvec[1];
+	struct work_struct	work;
+	const char		*type;
+	size_t			iov_len;
+	size_t			req_len;	/* Size of request blob */
+	size_t			reply_len;	/* Size of reply blob */
+	unsigned int		debug_id;
+	unsigned int		operation_id;
+	struct rxperf_proto_params params;
+	__be32			tmp[2];
+	s32			abort_code;
+	enum rxperf_call_state	state;
+	short			error;
+	unsigned short		unmarshal;
+	u16			service_id;
+	int (*deliver)(struct rxperf_call *call);
+	void (*processor)(struct work_struct *work);
+};
+
+static struct socket *rxperf_socket;
+static struct key *rxperf_sec_keyring;	/* Ring of security/crypto keys */
+static struct workqueue_struct *rxperf_workqueue;
+
+static void rxperf_deliver_to_call(struct work_struct *work);
+static int rxperf_deliver_param_block(struct rxperf_call *call);
+static int rxperf_deliver_request(struct rxperf_call *call);
+static int rxperf_process_call(struct rxperf_call *call);
+static void rxperf_charge_preallocation(struct work_struct *work);
+
+static DECLARE_WORK(rxperf_charge_preallocation_work,
+		    rxperf_charge_preallocation);
+
+static inline void rxperf_set_call_state(struct rxperf_call *call,
+					 enum rxperf_call_state to)
+{
+	call->state = to;
+}
+
+static inline void rxperf_set_call_complete(struct rxperf_call *call,
+					    int error, s32 remote_abort)
+{
+	if (call->state != RXPERF_CALL_COMPLETE) {
+		call->abort_code = remote_abort;
+		call->error = error;
+		call->state = RXPERF_CALL_COMPLETE;
+	}
+}
+
+static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall,
+				       unsigned long user_call_ID)
+{
+	kfree((struct rxperf_call *)user_call_ID);
+}
+
+static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+			       unsigned long user_call_ID)
+{
+	queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work);
+}
+
+static void rxperf_queue_call_work(struct rxperf_call *call)
+{
+	queue_work(rxperf_workqueue, &call->work);
+}
+
+static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+			     unsigned long call_user_ID)
+{
+	struct rxperf_call *call = (struct rxperf_call *)call_user_ID;
+
+	if (call->state != RXPERF_CALL_COMPLETE)
+		rxperf_queue_call_work(call);
+}
+
+static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+	struct rxperf_call *call = (struct rxperf_call *)user_call_ID;
+
+	call->rxcall = rxcall;
+}
+
+static void rxperf_notify_end_reply_tx(struct sock *sock,
+				       struct rxrpc_call *rxcall,
+				       unsigned long call_user_ID)
+{
+	rxperf_set_call_state((struct rxperf_call *)call_user_ID,
+			      RXPERF_CALL_SV_AWAIT_ACK);
+}
+
+/*
+ * Charge the incoming call preallocation.
+ */
+static void rxperf_charge_preallocation(struct work_struct *work)
+{
+	struct rxperf_call *call;
+
+	for (;;) {
+		call = kzalloc(sizeof(*call), GFP_KERNEL);
+		if (!call)
+			break;
+
+		call->type		= "unset";
+		call->debug_id		= atomic_inc_return(&rxrpc_debug_id);
+		call->deliver		= rxperf_deliver_param_block;
+		call->state		= RXPERF_CALL_SV_AWAIT_PARAMS;
+		call->service_id	= RX_PERF_SERVICE;
+		call->iov_len		= sizeof(call->params);
+		call->kvec[0].iov_len	= sizeof(call->params);
+		call->kvec[0].iov_base	= &call->params;
+		iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+		INIT_WORK(&call->work, rxperf_deliver_to_call);
+
+		if (rxrpc_kernel_charge_accept(rxperf_socket,
+					       rxperf_notify_rx,
+					       rxperf_rx_attach,
+					       (unsigned long)call,
+					       GFP_KERNEL,
+					       call->debug_id) < 0)
+			break;
+		call = NULL;
+	}
+
+	kfree(call);
+}
+
+/*
+ * Open an rxrpc socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+static int rxperf_open_socket(void)
+{
+	struct sockaddr_rxrpc srx;
+	struct socket *socket;
+	int ret;
+
+	ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6,
+			       &socket);
+	if (ret < 0)
+		goto error_1;
+
+	socket->sk->sk_allocation = GFP_NOFS;
+
+	/* bind the callback manager's address to make this a server socket */
+	memset(&srx, 0, sizeof(srx));
+	srx.srx_family			= AF_RXRPC;
+	srx.srx_service			= RX_PERF_SERVICE;
+	srx.transport_type		= SOCK_DGRAM;
+	srx.transport_len		= sizeof(srx.transport.sin6);
+	srx.transport.sin6.sin6_family	= AF_INET6;
+	srx.transport.sin6.sin6_port	= htons(RXPERF_PORT);
+
+	ret = rxrpc_sock_set_min_security_level(socket->sk,
+						RXRPC_SECURITY_ENCRYPT);
+	if (ret < 0)
+		goto error_2;
+
+	ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring);
+
+	ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx));
+	if (ret < 0)
+		goto error_2;
+
+	rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call,
+					   rxperf_rx_discard_new_call);
+
+	ret = kernel_listen(socket, INT_MAX);
+	if (ret < 0)
+		goto error_2;
+
+	rxperf_socket = socket;
+	rxperf_charge_preallocation(&rxperf_charge_preallocation_work);
+	return 0;
+
+error_2:
+	sock_release(socket);
+error_1:
+	pr_err("Can't set up rxperf socket: %d\n", ret);
+	return ret;
+}
+
+/*
+ * close the rxrpc socket rxperf was using
+ */
+static void rxperf_close_socket(void)
+{
+	kernel_listen(rxperf_socket, 0);
+	kernel_sock_shutdown(rxperf_socket, SHUT_RDWR);
+	flush_workqueue(rxperf_workqueue);
+	sock_release(rxperf_socket);
+}
+
+/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort)
+{
+	static int max = 0;
+	const char *msg;
+	int m;
+
+	switch (remote_abort) {
+	case RX_EOF:		 msg = "unexpected EOF";	break;
+	case RXGEN_CC_MARSHAL:	 msg = "client marshalling";	break;
+	case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling";	break;
+	case RXGEN_SS_MARSHAL:	 msg = "server marshalling";	break;
+	case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling";	break;
+	case RXGEN_DECODE:	 msg = "opcode decode";		break;
+	case RXGEN_SS_XDRFREE:	 msg = "server XDR cleanup";	break;
+	case RXGEN_CC_XDRFREE:	 msg = "client XDR cleanup";	break;
+	case -32:		 msg = "insufficient data";	break;
+	default:
+		return;
+	}
+
+	m = max;
+	if (m < 3) {
+		max = m + 1;
+		pr_info("Peer reported %s failure on %s\n", msg, call->type);
+	}
+}
+
+/*
+ * deliver messages to a call
+ */
+static void rxperf_deliver_to_call(struct work_struct *work)
+{
+	struct rxperf_call *call = container_of(work, struct rxperf_call, work);
+	enum rxperf_call_state state;
+	u32 abort_code, remote_abort = 0;
+	int ret = 0;
+
+	if (call->state == RXPERF_CALL_COMPLETE)
+		return;
+
+	while (state = call->state,
+	       state == RXPERF_CALL_SV_AWAIT_PARAMS ||
+	       state == RXPERF_CALL_SV_AWAIT_REQUEST ||
+	       state == RXPERF_CALL_SV_AWAIT_ACK
+	       ) {
+		if (state == RXPERF_CALL_SV_AWAIT_ACK) {
+			if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall))
+				goto call_complete;
+			return;
+		}
+
+		ret = call->deliver(call);
+		if (ret == 0)
+			ret = rxperf_process_call(call);
+
+		switch (ret) {
+		case 0:
+			continue;
+		case -EINPROGRESS:
+		case -EAGAIN:
+			return;
+		case -ECONNABORTED:
+			rxperf_log_error(call, call->abort_code);
+			goto call_complete;
+		case -EOPNOTSUPP:
+			abort_code = RXGEN_OPCODE;
+			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+						abort_code, ret,
+						rxperf_abort_op_not_supported);
+			goto call_complete;
+		case -ENOTSUPP:
+			abort_code = RX_USER_ABORT;
+			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+						abort_code, ret,
+						rxperf_abort_op_not_supported);
+			goto call_complete;
+		case -EIO:
+			pr_err("Call %u in bad state %u\n",
+			       call->debug_id, call->state);
+			fallthrough;
+		case -ENODATA:
+		case -EBADMSG:
+		case -EMSGSIZE:
+		case -ENOMEM:
+		case -EFAULT:
+			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+						RXGEN_SS_UNMARSHAL, ret,
+						rxperf_abort_unmarshal_error);
+			goto call_complete;
+		default:
+			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+						RX_CALL_DEAD, ret,
+						rxperf_abort_general_error);
+			goto call_complete;
+		}
+	}
+
+call_complete:
+	rxperf_set_call_complete(call, ret, remote_abort);
+	/* The call may have been requeued */
+	rxrpc_kernel_end_call(rxperf_socket, call->rxcall);
+	cancel_work(&call->work);
+	kfree(call);
+}
+
+/*
+ * Extract a piece of data from the received data socket buffers.
+ */
+static int rxperf_extract_data(struct rxperf_call *call, bool want_more)
+{
+	u32 remote_abort = 0;
+	int ret;
+
+	ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter,
+				     &call->iov_len, want_more, &remote_abort,
+				     &call->service_id);
+	pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n",
+		 iov_iter_count(&call->iter), call->iov_len, want_more, ret);
+	if (ret == 0 || ret == -EAGAIN)
+		return ret;
+
+	if (ret == 1) {
+		switch (call->state) {
+		case RXPERF_CALL_SV_AWAIT_REQUEST:
+			rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING);
+			break;
+		case RXPERF_CALL_COMPLETE:
+			pr_debug("premature completion %d", call->error);
+			return call->error;
+		default:
+			break;
+		}
+		return 0;
+	}
+
+	rxperf_set_call_complete(call, ret, remote_abort);
+	return ret;
+}
+
+/*
+ * Grab the operation ID from an incoming manager call.
+ */
+static int rxperf_deliver_param_block(struct rxperf_call *call)
+{
+	u32 version;
+	int ret;
+
+	/* Extract the parameter block */
+	ret = rxperf_extract_data(call, true);
+	if (ret < 0)
+		return ret;
+
+	version			= ntohl(call->params.version);
+	call->operation_id	= ntohl(call->params.type);
+	call->deliver		= rxperf_deliver_request;
+
+	if (version != RX_PERF_VERSION) {
+		pr_info("Version mismatch %x\n", version);
+		return -ENOTSUPP;
+	}
+
+	switch (call->operation_id) {
+	case RX_PERF_SEND:
+		call->type = "send";
+		call->reply_len = 0;
+		call->iov_len = 4;	/* Expect req size */
+		break;
+	case RX_PERF_RECV:
+		call->type = "recv";
+		call->req_len = 0;
+		call->iov_len = 4;	/* Expect reply size */
+		break;
+	case RX_PERF_RPC:
+		call->type = "rpc";
+		call->iov_len = 8;	/* Expect req size and reply size */
+		break;
+	case RX_PERF_FILE:
+		call->type = "file";
+		fallthrough;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST);
+	return call->deliver(call);
+}
+
+/*
+ * Deliver the request data.
+ */
+static int rxperf_deliver_request(struct rxperf_call *call)
+{
+	int ret;
+
+	switch (call->unmarshal) {
+	case 0:
+		call->kvec[0].iov_len	= call->iov_len;
+		call->kvec[0].iov_base	= call->tmp;
+		iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+		call->unmarshal++;
+		fallthrough;
+	case 1:
+		ret = rxperf_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		switch (call->operation_id) {
+		case RX_PERF_SEND:
+			call->type = "send";
+			call->req_len	= ntohl(call->tmp[0]);
+			call->reply_len	= 0;
+			break;
+		case RX_PERF_RECV:
+			call->type = "recv";
+			call->req_len = 0;
+			call->reply_len	= ntohl(call->tmp[0]);
+			break;
+		case RX_PERF_RPC:
+			call->type = "rpc";
+			call->req_len	= ntohl(call->tmp[0]);
+			call->reply_len	= ntohl(call->tmp[1]);
+			break;
+		default:
+			pr_info("Can't parse extra params\n");
+			return -EIO;
+		}
+
+		pr_debug("CALL op=%s rq=%zx rp=%zx\n",
+			 call->type, call->req_len, call->reply_len);
+
+		call->iov_len = call->req_len;
+		iov_iter_discard(&call->iter, READ, call->req_len);
+		call->unmarshal++;
+		fallthrough;
+	case 2:
+		ret = rxperf_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+		call->unmarshal++;
+		fallthrough;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Process a call for which we've received the request.
+ */
+static int rxperf_process_call(struct rxperf_call *call)
+{
+	struct msghdr msg = {};
+	struct bio_vec bv;
+	struct kvec iov[1];
+	ssize_t n;
+	size_t reply_len = call->reply_len, len;
+
+	rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall,
+				   reply_len + sizeof(rxperf_magic_cookie));
+
+	while (reply_len > 0) {
+		len = min_t(size_t, reply_len, PAGE_SIZE);
+		bvec_set_page(&bv, ZERO_PAGE(0), len, 0);
+		iov_iter_bvec(&msg.msg_iter, WRITE, &bv, 1, len);
+		msg.msg_flags = MSG_MORE;
+		n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg,
+					   len, rxperf_notify_end_reply_tx);
+		if (n < 0)
+			return n;
+		if (n == 0)
+			return -EIO;
+		reply_len -= n;
+	}
+
+	len = sizeof(rxperf_magic_cookie);
+	iov[0].iov_base	= (void *)rxperf_magic_cookie;
+	iov[0].iov_len	= len;
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+	msg.msg_flags = 0;
+	n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len,
+				   rxperf_notify_end_reply_tx);
+	if (n >= 0)
+		return 0; /* Success */
+
+	if (n == -ENOMEM)
+		rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					rxperf_abort_oom);
+	return n;
+}
+
+/*
+ * Add a key to the security keyring.
+ */
+static int rxperf_add_key(struct key *keyring)
+{
+	key_ref_t kref;
+	int ret;
+
+	kref = key_create_or_update(make_key_ref(keyring, true),
+				    "rxrpc_s",
+				    __stringify(RX_PERF_SERVICE) ":2",
+				    secret,
+				    sizeof(secret),
+				    KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH
+				    | KEY_USR_VIEW,
+				    KEY_ALLOC_NOT_IN_QUOTA);
+
+	if (IS_ERR(kref)) {
+		pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref));
+		return PTR_ERR(kref);
+	}
+
+	ret = key_link(keyring, key_ref_to_ptr(kref));
+	if (ret < 0)
+		pr_err("Can't link rxperf server key: %d\n", ret);
+	key_ref_put(kref);
+	return ret;
+}
+
+/*
+ * Initialise the rxperf server.
+ */
+static int __init rxperf_init(void)
+{
+	struct key *keyring;
+	int ret = -ENOMEM;
+
+	pr_info("Server registering\n");
+
+	rxperf_workqueue = alloc_workqueue("rxperf", 0, 0);
+	if (!rxperf_workqueue)
+		goto error_workqueue;
+
+	keyring = keyring_alloc("rxperf_server",
+				GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+				KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+				KEY_POS_WRITE |
+				KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+				KEY_USR_WRITE |
+				KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH,
+				KEY_ALLOC_NOT_IN_QUOTA,
+				NULL, NULL);
+	if (IS_ERR(keyring)) {
+		pr_err("Can't allocate rxperf server keyring: %ld\n",
+		       PTR_ERR(keyring));
+		goto error_keyring;
+	}
+	rxperf_sec_keyring = keyring;
+	ret = rxperf_add_key(keyring);
+	if (ret < 0)
+		goto error_key;
+
+	ret = rxperf_open_socket();
+	if (ret < 0)
+		goto error_socket;
+	return 0;
+
+error_socket:
+error_key:
+	key_put(rxperf_sec_keyring);
+error_keyring:
+	destroy_workqueue(rxperf_workqueue);
+	rcu_barrier();
+error_workqueue:
+	pr_err("Failed to register: %d\n", ret);
+	return ret;
+}
+late_initcall(rxperf_init); /* Must be called after net/ to create socket */
+
+static void __exit rxperf_exit(void)
+{
+	pr_info("Server unregistering.\n");
+
+	rxperf_close_socket();
+	key_put(rxperf_sec_keyring);
+	destroy_workqueue(rxperf_workqueue);
+	rcu_barrier();
+}
+module_exit(rxperf_exit);
+
-- 
cgit v1.2.3