aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/sfc/tc_counters.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /drivers/net/ethernet/sfc/tc_counters.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'drivers/net/ethernet/sfc/tc_counters.c')
-rw-r--r--drivers/net/ethernet/sfc/tc_counters.c503
1 files changed, 503 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c
new file mode 100644
index 000000000..d1a91d54c
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tc_counters.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "tc_counters.h"
+#include "mae_counter_format.h"
+#include "mae.h"
+#include "rx_common.h"
+
+/* Counter-management hashtables */
+
+static const struct rhashtable_params efx_tc_counter_id_ht_params = {
+ .key_len = offsetof(struct efx_tc_counter_index, linkage),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_tc_counter_index, linkage),
+};
+
+static const struct rhashtable_params efx_tc_counter_ht_params = {
+ .key_len = offsetof(struct efx_tc_counter, linkage),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_tc_counter, linkage),
+};
+
+static void efx_tc_counter_free(void *ptr, void *__unused)
+{
+ struct efx_tc_counter *cnt = ptr;
+
+ kfree(cnt);
+}
+
+static void efx_tc_counter_id_free(void *ptr, void *__unused)
+{
+ struct efx_tc_counter_index *ctr = ptr;
+
+ WARN_ON(refcount_read(&ctr->ref));
+ kfree(ctr);
+}
+
+int efx_tc_init_counters(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
+ if (rc < 0)
+ goto fail_counter_id_ht;
+ rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
+ if (rc < 0)
+ goto fail_counter_ht;
+ return 0;
+fail_counter_ht:
+ rhashtable_destroy(&efx->tc->counter_id_ht);
+fail_counter_id_ht:
+ return rc;
+}
+
+/* Only call this in init failure teardown.
+ * Normal exit should fini instead as there may be entries in the table.
+ */
+void efx_tc_destroy_counters(struct efx_nic *efx)
+{
+ rhashtable_destroy(&efx->tc->counter_ht);
+ rhashtable_destroy(&efx->tc->counter_id_ht);
+}
+
+void efx_tc_fini_counters(struct efx_nic *efx)
+{
+ rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
+ rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
+}
+
+/* Counter allocation */
+
+static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
+ int type)
+{
+ struct efx_tc_counter *cnt;
+ int rc, rc2;
+
+ cnt = kzalloc(sizeof(*cnt), GFP_USER);
+ if (!cnt)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&cnt->lock);
+ cnt->touched = jiffies;
+ cnt->type = type;
+
+ rc = efx_mae_allocate_counter(efx, cnt);
+ if (rc)
+ goto fail1;
+ rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
+ efx_tc_counter_ht_params);
+ if (rc)
+ goto fail2;
+ return cnt;
+fail2:
+ /* If we get here, it implies that we couldn't insert into the table,
+ * which in turn probably means that the fw_id was already taken.
+ * In that case, it's unclear whether we really 'own' the fw_id; but
+ * the firmware seemed to think we did, so it's proper to free it.
+ */
+ rc2 = efx_mae_free_counter(efx, cnt);
+ if (rc2)
+ netif_warn(efx, hw, efx->net_dev,
+ "Failed to free MAE counter %u, rc %d\n",
+ cnt->fw_id, rc2);
+fail1:
+ kfree(cnt);
+ return ERR_PTR(rc > 0 ? -EIO : rc);
+}
+
+static void efx_tc_flower_release_counter(struct efx_nic *efx,
+ struct efx_tc_counter *cnt)
+{
+ int rc;
+
+ rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
+ efx_tc_counter_ht_params);
+ rc = efx_mae_free_counter(efx, cnt);
+ if (rc)
+ netif_warn(efx, hw, efx->net_dev,
+ "Failed to free MAE counter %u, rc %d\n",
+ cnt->fw_id, rc);
+ /* This doesn't protect counter updates coming in arbitrarily long
+ * after we deleted the counter. The RCU just ensures that we won't
+ * free the counter while another thread has a pointer to it.
+ * Ensuring we don't update the wrong counter if the ID gets re-used
+ * is handled by the generation count.
+ */
+ synchronize_rcu();
+ EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
+ kfree(cnt);
+}
+
+static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
+ struct efx_nic *efx, int type, u32 fw_id)
+{
+ struct efx_tc_counter key = {};
+
+ key.fw_id = fw_id;
+ key.type = type;
+
+ return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
+ efx_tc_counter_ht_params);
+}
+
+/* TC cookie to counter mapping */
+
+void efx_tc_flower_put_counter_index(struct efx_nic *efx,
+ struct efx_tc_counter_index *ctr)
+{
+ if (!refcount_dec_and_test(&ctr->ref))
+ return; /* still in use */
+ rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
+ efx_tc_counter_id_ht_params);
+ efx_tc_flower_release_counter(efx, ctr->cnt);
+ kfree(ctr);
+}
+
+struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
+ struct efx_nic *efx, unsigned long cookie,
+ enum efx_tc_counter_type type)
+{
+ struct efx_tc_counter_index *ctr, *old;
+ struct efx_tc_counter *cnt;
+
+ ctr = kzalloc(sizeof(*ctr), GFP_USER);
+ if (!ctr)
+ return ERR_PTR(-ENOMEM);
+ ctr->cookie = cookie;
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
+ &ctr->linkage,
+ efx_tc_counter_id_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ kfree(ctr);
+ if (!refcount_inc_not_zero(&old->ref))
+ return ERR_PTR(-EAGAIN);
+ /* existing entry found */
+ ctr = old;
+ } else {
+ cnt = efx_tc_flower_allocate_counter(efx, type);
+ if (IS_ERR(cnt)) {
+ rhashtable_remove_fast(&efx->tc->counter_id_ht,
+ &ctr->linkage,
+ efx_tc_counter_id_ht_params);
+ kfree(ctr);
+ return (void *)cnt; /* it's an ERR_PTR */
+ }
+ ctr->cnt = cnt;
+ refcount_set(&ctr->ref, 1);
+ }
+ return ctr;
+}
+
+struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
+ struct efx_nic *efx, unsigned long cookie)
+{
+ struct efx_tc_counter_index key = {};
+
+ key.cookie = cookie;
+ return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
+ efx_tc_counter_id_ht_params);
+}
+
+/* TC Channel. Counter updates are delivered on this channel's RXQ. */
+
+static void efx_tc_handle_no_channel(struct efx_nic *efx)
+{
+ netif_warn(efx, drv, efx->net_dev,
+ "MAE counters require MSI-X and 1 additional interrupt vector.\n");
+}
+
+static int efx_tc_probe_channel(struct efx_channel *channel)
+{
+ struct efx_rx_queue *rx_queue = &channel->rx_queue;
+
+ channel->irq_moderation_us = 0;
+ rx_queue->core_index = 0;
+
+ INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
+
+ return 0;
+}
+
+static int efx_tc_start_channel(struct efx_channel *channel)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+ struct efx_nic *efx = channel->efx;
+
+ return efx_mae_start_counters(efx, rx_queue);
+}
+
+static void efx_tc_stop_channel(struct efx_channel *channel)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+ struct efx_nic *efx = channel->efx;
+ int rc;
+
+ rc = efx_mae_stop_counters(efx, rx_queue);
+ if (rc)
+ netif_warn(efx, drv, efx->net_dev,
+ "Failed to stop MAE counters streaming, rc=%d.\n",
+ rc);
+ rx_queue->grant_credits = false;
+ flush_work(&rx_queue->grant_work);
+}
+
+static void efx_tc_remove_channel(struct efx_channel *channel)
+{
+}
+
+static void efx_tc_get_channel_name(struct efx_channel *channel,
+ char *buf, size_t len)
+{
+ snprintf(buf, len, "%s-mae", channel->efx->name);
+}
+
+static void efx_tc_counter_update(struct efx_nic *efx,
+ enum efx_tc_counter_type counter_type,
+ u32 counter_idx, u64 packets, u64 bytes,
+ u32 mark)
+{
+ struct efx_tc_counter *cnt;
+
+ rcu_read_lock(); /* Protect against deletion of 'cnt' */
+ cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
+ if (!cnt) {
+ /* This can legitimately happen when a counter is removed,
+ * with updates for the counter still in-flight; however this
+ * should be an infrequent occurrence.
+ */
+ if (net_ratelimit())
+ netif_dbg(efx, drv, efx->net_dev,
+ "Got update for unwanted MAE counter %u type %u\n",
+ counter_idx, counter_type);
+ goto out;
+ }
+
+ spin_lock_bh(&cnt->lock);
+ if ((s32)mark - (s32)cnt->gen < 0) {
+ /* This counter update packet is from before the counter was
+ * allocated; thus it must be for a previous counter with
+ * the same ID that has since been freed, and it should be
+ * ignored.
+ */
+ } else {
+ /* Update latest seen generation count. This ensures that
+ * even a long-lived counter won't start getting ignored if
+ * the generation count wraps around, unless it somehow
+ * manages to go 1<<31 generations without an update.
+ */
+ cnt->gen = mark;
+ /* update counter values */
+ cnt->packets += packets;
+ cnt->bytes += bytes;
+ cnt->touched = jiffies;
+ }
+ spin_unlock_bh(&cnt->lock);
+out:
+ rcu_read_unlock();
+}
+
+static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
+{
+ u16 n_counters, i;
+
+ /* Header format:
+ * + | 0 | 1 | 2 | 3 |
+ * 0 |version | reserved |
+ * 4 | seq_index | n_counters |
+ */
+
+ n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
+
+ /* Counter update entry format:
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
+ * | counter_idx | packet_count | byte_count |
+ */
+ for (i = 0; i < n_counters; i++) {
+ const void *entry = data + 8 + 16 * i;
+ u64 packet_count, byte_count;
+ u32 counter_idx;
+
+ counter_idx = le32_to_cpu(*(const __le32 *)entry);
+ packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
+ ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
+ byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
+ ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
+ efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
+ packet_count, byte_count, mark);
+ }
+}
+
+#define TCV2_HDR_PTR(pkt, field) \
+ ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7), \
+ (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
+#define TCV2_HDR_BYTE(pkt, field) \
+ ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
+ *TCV2_HDR_PTR(pkt, field))
+#define TCV2_HDR_WORD(pkt, field) \
+ ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
+ (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15), \
+ *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
+#define TCV2_PKT_PTR(pkt, poff, i, field) \
+ ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7), \
+ (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff + \
+ i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
+
+/* Read a little-endian 48-bit field with 16-bit alignment */
+static u64 efx_tc_read48(const __le16 *field)
+{
+ u64 out = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ out |= (u64)le16_to_cpu(field[i]) << (i * 16);
+ return out;
+}
+
+static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
+ const u8 *data, u32 mark)
+{
+ u8 payload_offset, header_offset, ident;
+ enum efx_tc_counter_type type;
+ u16 n_counters, i;
+
+ ident = TCV2_HDR_BYTE(data, IDENTIFIER);
+ switch (ident) {
+ case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
+ type = EFX_TC_COUNTER_TYPE_AR;
+ break;
+ case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
+ type = EFX_TC_COUNTER_TYPE_CT;
+ break;
+ case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
+ type = EFX_TC_COUNTER_TYPE_OR;
+ break;
+ default:
+ if (net_ratelimit())
+ netif_err(efx, drv, efx->net_dev,
+ "ignored v2 MAE counter packet (bad identifier %u"
+ "), counters may be inaccurate\n", ident);
+ return EFX_TC_COUNTER_TYPE_MAX;
+ }
+ header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
+ /* mae_counter_format.h implies that this offset is fixed, since it
+ * carries on with SOP-based LBNs for the fields in this header
+ */
+ if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
+ if (net_ratelimit())
+ netif_err(efx, drv, efx->net_dev,
+ "choked on v2 MAE counter packet (bad header_offset %u"
+ "), counters may be inaccurate\n", header_offset);
+ return EFX_TC_COUNTER_TYPE_MAX;
+ }
+ payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
+ n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
+
+ for (i = 0; i < n_counters; i++) {
+ const void *counter_idx_p, *packet_count_p, *byte_count_p;
+ u64 packet_count, byte_count;
+ u32 counter_idx;
+
+ /* 24-bit field with 32-bit alignment */
+ counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
+ counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
+ /* 48-bit field with 16-bit alignment */
+ packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
+ packet_count = efx_tc_read48((const __le16 *)packet_count_p);
+ /* 48-bit field with 16-bit alignment */
+ byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
+ BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
+ byte_count = efx_tc_read48((const __le16 *)byte_count_p);
+
+ if (type == EFX_TC_COUNTER_TYPE_CT) {
+ /* CT counters are 1-bit saturating counters to update
+ * the lastuse time in CT stats. A received CT counter
+ * should have packet counter to 0 and only LSB bit on
+ * in byte counter.
+ */
+ if (packet_count || byte_count != 1)
+ netdev_warn_once(efx->net_dev,
+ "CT counter with inconsistent state (%llu, %llu)\n",
+ packet_count, byte_count);
+ /* Do not increment the driver's byte counter */
+ byte_count = 0;
+ }
+
+ efx_tc_counter_update(efx, type, counter_idx, packet_count,
+ byte_count, mark);
+ }
+ return type;
+}
+
+/* We always swallow the packet, whether successful or not, since it's not
+ * a network packet and shouldn't ever be forwarded to the stack.
+ * @mark is the generation count for counter allocations.
+ */
+static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
+{
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
+ channel->rx_pkt_index);
+ const u8 *data = efx_rx_buf_va(rx_buf);
+ struct efx_nic *efx = rx_queue->efx;
+ enum efx_tc_counter_type type;
+ u8 version;
+
+ /* version is always first byte of packet */
+ version = *data;
+ switch (version) {
+ case 1:
+ type = EFX_TC_COUNTER_TYPE_AR;
+ efx_tc_rx_version_1(efx, data, mark);
+ break;
+ case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
+ type = efx_tc_rx_version_2(efx, data, mark);
+ break;
+ default:
+ if (net_ratelimit())
+ netif_err(efx, drv, efx->net_dev,
+ "choked on MAE counter packet (bad version %u"
+ "); counters may be inaccurate\n",
+ version);
+ goto out;
+ }
+
+ if (type < EFX_TC_COUNTER_TYPE_MAX) {
+ /* Update seen_gen unconditionally, to avoid a missed wakeup if
+ * we race with efx_mae_stop_counters().
+ */
+ efx->tc->seen_gen[type] = mark;
+ if (efx->tc->flush_counters &&
+ (s32)(efx->tc->flush_gen[type] - mark) <= 0)
+ wake_up(&efx->tc->flush_wq);
+ }
+out:
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ channel->rx_pkt_n_frags = 0;
+ return true;
+}
+
+const struct efx_channel_type efx_tc_channel_type = {
+ .handle_no_channel = efx_tc_handle_no_channel,
+ .pre_probe = efx_tc_probe_channel,
+ .start = efx_tc_start_channel,
+ .stop = efx_tc_stop_channel,
+ .post_remove = efx_tc_remove_channel,
+ .get_name = efx_tc_get_channel_name,
+ .receive_raw = efx_tc_rx,
+ .keep_eventq = true,
+};