aboutsummaryrefslogtreecommitdiff
path: root/net/dsa/dsa.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /net/dsa/dsa.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'net/dsa/dsa.c')
-rw-r--r--net/dsa/dsa.c1749
1 files changed, 1749 insertions, 0 deletions
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
new file mode 100644
index 000000000..e5f156940
--- /dev/null
+++ b/net/dsa/dsa.c
@@ -0,0 +1,1749 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DSA topology and switch handling
+ *
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
+ * Copyright (c) 2016 Andrew Lunn <andrew@lunn.ch>
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <net/sch_generic.h>
+
+#include "devlink.h"
+#include "dsa.h"
+#include "master.h"
+#include "netlink.h"
+#include "port.h"
+#include "slave.h"
+#include "switch.h"
+#include "tag.h"
+
+#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
+
+static DEFINE_MUTEX(dsa2_mutex);
+LIST_HEAD(dsa_tree_list);
+
+static struct workqueue_struct *dsa_owq;
+
+/* Track the bridges with forwarding offload enabled */
+static unsigned long dsa_fwd_offloading_bridges;
+
+bool dsa_schedule_work(struct work_struct *work)
+{
+ return queue_work(dsa_owq, work);
+}
+
+void dsa_flush_workqueue(void)
+{
+ flush_workqueue(dsa_owq);
+}
+EXPORT_SYMBOL_GPL(dsa_flush_workqueue);
+
+/**
+ * dsa_lag_map() - Map LAG structure to a linear LAG array
+ * @dst: Tree in which to record the mapping.
+ * @lag: LAG structure that is to be mapped to the tree's array.
+ *
+ * dsa_lag_id/dsa_lag_by_id can then be used to translate between the
+ * two spaces. The size of the mapping space is determined by the
+ * driver by setting ds->num_lag_ids. It is perfectly legal to leave
+ * it unset if it is not needed, in which case these functions become
+ * no-ops.
+ */
+void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag)
+{
+ unsigned int id;
+
+ for (id = 1; id <= dst->lags_len; id++) {
+ if (!dsa_lag_by_id(dst, id)) {
+ dst->lags[id - 1] = lag;
+ lag->id = id;
+ return;
+ }
+ }
+
+ /* No IDs left, which is OK. Some drivers do not need it. The
+ * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
+ * returns an error for this device when joining the LAG. The
+ * driver can then return -EOPNOTSUPP back to DSA, which will
+ * fall back to a software LAG.
+ */
+}
+
+/**
+ * dsa_lag_unmap() - Remove a LAG ID mapping
+ * @dst: Tree in which the mapping is recorded.
+ * @lag: LAG structure that was mapped.
+ *
+ * As there may be multiple users of the mapping, it is only removed
+ * if there are no other references to it.
+ */
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag)
+{
+ unsigned int id;
+
+ dsa_lags_foreach_id(id, dst) {
+ if (dsa_lag_by_id(dst, id) == lag) {
+ dst->lags[id - 1] = NULL;
+ lag->id = 0;
+ break;
+ }
+ }
+}
+
+struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst,
+ const struct net_device *lag_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_lag_dev_get(dp) == lag_dev)
+ return dp->lag;
+
+ return NULL;
+}
+
+struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst,
+ const struct net_device *br)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_bridge_dev_get(dp) == br)
+ return dp->bridge;
+
+ return NULL;
+}
+
+static int dsa_bridge_num_find(const struct net_device *bridge_dev)
+{
+ struct dsa_switch_tree *dst;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ struct dsa_bridge *bridge;
+
+ bridge = dsa_tree_bridge_find(dst, bridge_dev);
+ if (bridge)
+ return bridge->num;
+ }
+
+ return 0;
+}
+
+unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
+{
+ unsigned int bridge_num = dsa_bridge_num_find(bridge_dev);
+
+ /* Switches without FDB isolation support don't get unique
+ * bridge numbering
+ */
+ if (!max)
+ return 0;
+
+ if (!bridge_num) {
+ /* First port that requests FDB isolation or TX forwarding
+ * offload for this bridge
+ */
+ bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges,
+ DSA_MAX_NUM_OFFLOADING_BRIDGES,
+ 1);
+ if (bridge_num >= max)
+ return 0;
+
+ set_bit(bridge_num, &dsa_fwd_offloading_bridges);
+ }
+
+ return bridge_num;
+}
+
+void dsa_bridge_num_put(const struct net_device *bridge_dev,
+ unsigned int bridge_num)
+{
+ /* Since we refcount bridges, we know that when we call this function
+ * it is no longer in use, so we can just go ahead and remove it from
+ * the bit mask.
+ */
+ clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
+}
+
+struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
+{
+ struct dsa_switch_tree *dst;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ if (dst->index != tree_index)
+ continue;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->index != sw_index)
+ continue;
+
+ return dp->ds;
+ }
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_find);
+
+static struct dsa_switch_tree *dsa_tree_find(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ if (dst->index == index)
+ return dst;
+
+ return NULL;
+}
+
+static struct dsa_switch_tree *dsa_tree_alloc(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+
+ dst->index = index;
+
+ INIT_LIST_HEAD(&dst->rtable);
+
+ INIT_LIST_HEAD(&dst->ports);
+
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dst->list, &dsa_tree_list);
+
+ kref_init(&dst->refcount);
+
+ return dst;
+}
+
+static void dsa_tree_free(struct dsa_switch_tree *dst)
+{
+ if (dst->tag_ops)
+ dsa_tag_driver_put(dst->tag_ops);
+ list_del(&dst->list);
+ kfree(dst);
+}
+
+static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst)
+{
+ if (dst)
+ kref_get(&dst->refcount);
+
+ return dst;
+}
+
+static struct dsa_switch_tree *dsa_tree_touch(int index)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = dsa_tree_find(index);
+ if (dst)
+ return dsa_tree_get(dst);
+ else
+ return dsa_tree_alloc(index);
+}
+
+static void dsa_tree_release(struct kref *ref)
+{
+ struct dsa_switch_tree *dst;
+
+ dst = container_of(ref, struct dsa_switch_tree, refcount);
+
+ dsa_tree_free(dst);
+}
+
+static void dsa_tree_put(struct dsa_switch_tree *dst)
+{
+ if (dst)
+ kref_put(&dst->refcount, dsa_tree_release);
+}
+
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+ struct device_node *dn)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->dn == dn)
+ return dp;
+
+ return NULL;
+}
+
+static struct dsa_link *dsa_link_touch(struct dsa_port *dp,
+ struct dsa_port *link_dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst;
+ struct dsa_link *dl;
+
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list)
+ if (dl->dp == dp && dl->link_dp == link_dp)
+ return dl;
+
+ dl = kzalloc(sizeof(*dl), GFP_KERNEL);
+ if (!dl)
+ return NULL;
+
+ dl->dp = dp;
+ dl->link_dp = link_dp;
+
+ INIT_LIST_HEAD(&dl->list);
+ list_add_tail(&dl->list, &dst->rtable);
+
+ return dl;
+}
+
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ struct device_node *dn = dp->dn;
+ struct of_phandle_iterator it;
+ struct dsa_port *link_dp;
+ struct dsa_link *dl;
+ int err;
+
+ of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+ link_dp = dsa_tree_find_port_by_node(dst, it.node);
+ if (!link_dp) {
+ of_node_put(it.node);
+ return false;
+ }
+
+ dl = dsa_link_touch(dp, link_dp);
+ if (!dl) {
+ of_node_put(it.node);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
+{
+ bool complete = true;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_dsa(dp)) {
+ complete = dsa_port_setup_routing_table(dp);
+ if (!complete)
+ break;
+ }
+ }
+
+ return complete;
+}
+
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ return dp;
+
+ return NULL;
+}
+
+struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst)
+{
+ struct device_node *ethernet;
+ struct net_device *master;
+ struct dsa_port *cpu_dp;
+
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0);
+ master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
+
+ return master;
+}
+
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!cpu_dp) {
+ pr_err("DSA: tree %d has no CPU port\n", dst->index);
+ return -EINVAL;
+ }
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->cpu_dp)
+ continue;
+
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+
+ return 0;
+}
+
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ list_for_each_entry(cpu_dp, &dst->ports, list) {
+ if (!dsa_port_is_cpu(cpu_dp))
+ continue;
+
+ /* Prefer a local CPU port */
+ dsa_switch_for_each_port(dp, cpu_dp->ds) {
+ /* Prefer the first local CPU port found */
+ if (dp->cpu_dp)
+ continue;
+
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+ }
+
+ return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = NULL;
+}
+
+static int dsa_port_setup(struct dsa_port *dp)
+{
+ bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
+ bool dsa_port_enabled = false;
+ int err = 0;
+
+ if (dp->setup)
+ return 0;
+
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ dsa_port_disable(dp);
+ break;
+ case DSA_PORT_TYPE_CPU:
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for CPU port %d\n",
+ dp->index);
+ }
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
+ break;
+ dsa_port_enabled = true;
+
+ break;
+ case DSA_PORT_TYPE_DSA:
+ if (dp->dn) {
+ err = dsa_shared_port_link_register_of(dp);
+ if (err)
+ break;
+ dsa_port_link_registered = true;
+ } else {
+ dev_warn(ds->dev,
+ "skipping link registration for DSA port %d\n",
+ dp->index);
+ }
+
+ err = dsa_port_enable(dp, NULL);
+ if (err)
+ break;
+ dsa_port_enabled = true;
+
+ break;
+ case DSA_PORT_TYPE_USER:
+ of_get_mac_address(dp->dn, dp->mac);
+ err = dsa_slave_create(dp);
+ break;
+ }
+
+ if (err && dsa_port_enabled)
+ dsa_port_disable(dp);
+ if (err && dsa_port_link_registered)
+ dsa_shared_port_link_unregister_of(dp);
+ if (err) {
+ dsa_port_devlink_teardown(dp);
+ return err;
+ }
+
+ dp->setup = true;
+
+ return 0;
+}
+
+static void dsa_port_teardown(struct dsa_port *dp)
+{
+ if (!dp->setup)
+ return;
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ break;
+ case DSA_PORT_TYPE_CPU:
+ dsa_port_disable(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_DSA:
+ dsa_port_disable(dp);
+ if (dp->dn)
+ dsa_shared_port_link_unregister_of(dp);
+ break;
+ case DSA_PORT_TYPE_USER:
+ if (dp->slave) {
+ dsa_slave_destroy(dp->slave);
+ dp->slave = NULL;
+ }
+ break;
+ }
+
+ dsa_port_devlink_teardown(dp);
+
+ dp->setup = false;
+}
+
+static int dsa_port_setup_as_unused(struct dsa_port *dp)
+{
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ return dsa_port_setup(dp);
+}
+
+static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
+{
+ const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
+ struct dsa_switch_tree *dst = ds->dst;
+ int err;
+
+ if (tag_ops->proto == dst->default_proto)
+ goto connect;
+
+ rtnl_lock();
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ rtnl_unlock();
+ if (err) {
+ dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ return err;
+ }
+
+connect:
+ if (tag_ops->connect) {
+ err = tag_ops->connect(ds);
+ if (err)
+ return err;
+ }
+
+ if (ds->ops->connect_tag_protocol) {
+ err = ds->ops->connect_tag_protocol(ds, tag_ops->proto);
+ if (err) {
+ dev_err(ds->dev,
+ "Unable to connect to tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ goto disconnect;
+ }
+ }
+
+ return 0;
+
+disconnect:
+ if (tag_ops->disconnect)
+ tag_ops->disconnect(ds);
+
+ return err;
+}
+
+static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds)
+{
+ const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
+
+ if (tag_ops->disconnect)
+ tag_ops->disconnect(ds);
+}
+
+static int dsa_switch_setup(struct dsa_switch *ds)
+{
+ struct device_node *dn;
+ int err;
+
+ if (ds->setup)
+ return 0;
+
+ /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
+ * driver and before ops->setup() has run, since the switch drivers and
+ * the slave MDIO bus driver rely on these values for probing PHY
+ * devices or not
+ */
+ ds->phys_mii_mask |= dsa_user_ports(ds);
+
+ err = dsa_switch_devlink_alloc(ds);
+ if (err)
+ return err;
+
+ err = dsa_switch_register_notifier(ds);
+ if (err)
+ goto devlink_free;
+
+ ds->configure_vlan_while_not_filtering = true;
+
+ err = ds->ops->setup(ds);
+ if (err < 0)
+ goto unregister_notifier;
+
+ err = dsa_switch_setup_tag_protocol(ds);
+ if (err)
+ goto teardown;
+
+ if (!ds->slave_mii_bus && ds->ops->phy_read) {
+ ds->slave_mii_bus = mdiobus_alloc();
+ if (!ds->slave_mii_bus) {
+ err = -ENOMEM;
+ goto teardown;
+ }
+
+ dsa_slave_mii_bus_init(ds);
+
+ dn = of_get_child_by_name(ds->dev->of_node, "mdio");
+
+ err = of_mdiobus_register(ds->slave_mii_bus, dn);
+ of_node_put(dn);
+ if (err < 0)
+ goto free_slave_mii_bus;
+ }
+
+ dsa_switch_devlink_register(ds);
+
+ ds->setup = true;
+ return 0;
+
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
+teardown:
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
+unregister_notifier:
+ dsa_switch_unregister_notifier(ds);
+devlink_free:
+ dsa_switch_devlink_free(ds);
+ return err;
+}
+
+static void dsa_switch_teardown(struct dsa_switch *ds)
+{
+ if (!ds->setup)
+ return;
+
+ dsa_switch_devlink_unregister(ds);
+
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
+ mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
+
+ dsa_switch_teardown_tag_protocol(ds);
+
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
+
+ dsa_switch_unregister_notifier(ds);
+
+ dsa_switch_devlink_free(ds);
+
+ ds->setup = false;
+}
+
+/* First tear down the non-shared, then the shared ports. This ensures that
+ * all work items scheduled by our switchdev handlers for user ports have
+ * completed before we destroy the refcounting kept on the shared ports.
+ */
+static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
+ dsa_port_teardown(dp);
+
+ dsa_flush_workqueue();
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
+ dsa_port_teardown(dp);
+}
+
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
+}
+
+/* Bring shared ports up first, then non-shared ports */
+static int dsa_tree_setup_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+ int err = 0;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) {
+ err = dsa_port_setup(dp);
+ if (err)
+ goto teardown;
+ }
+ }
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) {
+ err = dsa_port_setup(dp);
+ if (err) {
+ err = dsa_port_setup_as_unused(dp);
+ if (err)
+ goto teardown;
+ }
+ }
+ }
+
+ return 0;
+
+teardown:
+ dsa_tree_teardown_ports(dst);
+
+ return err;
+}
+
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+ int err = 0;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_switch_setup(dp->ds);
+ if (err) {
+ dsa_tree_teardown_switches(dst);
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp;
+ int err = 0;
+
+ rtnl_lock();
+
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+ bool admin_up = (master->flags & IFF_UP) &&
+ !qdisc_tx_is_noop(master);
+
+ err = dsa_master_setup(master, cpu_dp);
+ if (err)
+ break;
+
+ /* Replay master state event */
+ dsa_tree_master_admin_state_change(dst, master, admin_up);
+ dsa_tree_master_oper_state_change(dst, master,
+ netif_oper_up(master));
+ }
+
+ rtnl_unlock();
+
+ return err;
+}
+
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp;
+
+ rtnl_lock();
+
+ dsa_tree_for_each_cpu_port(cpu_dp, dst) {
+ struct net_device *master = cpu_dp->master;
+
+ /* Synthesizing an "admin down" state is sufficient for
+ * the switches to get a notification if the master is
+ * currently up and running.
+ */
+ dsa_tree_master_admin_state_change(dst, master, false);
+
+ dsa_master_teardown(master);
+ }
+
+ rtnl_unlock();
+}
+
+static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
+{
+ unsigned int len = 0;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->num_lag_ids > len)
+ len = dp->ds->num_lag_ids;
+ }
+
+ if (!len)
+ return 0;
+
+ dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
+ if (!dst->lags)
+ return -ENOMEM;
+
+ dst->lags_len = len;
+ return 0;
+}
+
+static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
+{
+ kfree(dst->lags);
+}
+
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
+{
+ bool complete;
+ int err;
+
+ if (dst->setup) {
+ pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+ dst->index);
+ return -EEXIST;
+ }
+
+ complete = dsa_tree_setup_routing_table(dst);
+ if (!complete)
+ return 0;
+
+ err = dsa_tree_setup_cpu_ports(dst);
+ if (err)
+ return err;
+
+ err = dsa_tree_setup_switches(dst);
+ if (err)
+ goto teardown_cpu_ports;
+
+ err = dsa_tree_setup_ports(dst);
+ if (err)
+ goto teardown_switches;
+
+ err = dsa_tree_setup_master(dst);
+ if (err)
+ goto teardown_ports;
+
+ err = dsa_tree_setup_lags(dst);
+ if (err)
+ goto teardown_master;
+
+ dst->setup = true;
+
+ pr_info("DSA: tree %d setup\n", dst->index);
+
+ return 0;
+
+teardown_master:
+ dsa_tree_teardown_master(dst);
+teardown_ports:
+ dsa_tree_teardown_ports(dst);
+teardown_switches:
+ dsa_tree_teardown_switches(dst);
+teardown_cpu_ports:
+ dsa_tree_teardown_cpu_ports(dst);
+
+ return err;
+}
+
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
+{
+ struct dsa_link *dl, *next;
+
+ if (!dst->setup)
+ return;
+
+ dsa_tree_teardown_lags(dst);
+
+ dsa_tree_teardown_master(dst);
+
+ dsa_tree_teardown_ports(dst);
+
+ dsa_tree_teardown_switches(dst);
+
+ dsa_tree_teardown_cpu_ports(dst);
+
+ list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+ list_del(&dl->list);
+ kfree(dl);
+ }
+
+ pr_info("DSA: tree %d torn down\n", dst->index);
+
+ dst->setup = false;
+}
+
+static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst,
+ const struct dsa_device_ops *tag_ops)
+{
+ const struct dsa_device_ops *old_tag_ops = dst->tag_ops;
+ struct dsa_notifier_tag_proto_info info;
+ int err;
+
+ dst->tag_ops = tag_ops;
+
+ /* Notify the switches from this tree about the connection
+ * to the new tagger
+ */
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info);
+ if (err && err != -EOPNOTSUPP)
+ goto out_disconnect;
+
+ /* Notify the old tagger about the disconnection from this tree */
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
+
+ return 0;
+
+out_disconnect:
+ info.tag_ops = tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info);
+ dst->tag_ops = old_tag_ops;
+
+ return err;
+}
+
+/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+ * is that all DSA switches within a tree share the same tagger, otherwise
+ * they would have formed disjoint trees (different "dsa,member" values).
+ */
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops)
+{
+ struct dsa_notifier_tag_proto_info info;
+ struct dsa_port *dp;
+ int err = -EBUSY;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ /* At the moment we don't allow changing the tag protocol under
+ * traffic. The rtnl_mutex also happens to serialize concurrent
+ * attempts to change the tagging protocol. If we ever lift the IFF_UP
+ * restriction, there needs to be another mutex which serializes this.
+ */
+ dsa_tree_for_each_user_port(dp, dst) {
+ if (dsa_port_to_master(dp)->flags & IFF_UP)
+ goto out_unlock;
+
+ if (dp->slave->flags & IFF_UP)
+ goto out_unlock;
+ }
+
+ /* Notify the tag protocol change */
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ if (err)
+ goto out_unwind_tagger;
+
+ err = dsa_tree_bind_tag_proto(dst, tag_ops);
+ if (err)
+ goto out_unwind_tagger;
+
+ rtnl_unlock();
+
+ return 0;
+
+out_unwind_tagger:
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static void dsa_tree_master_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master)
+{
+ struct dsa_notifier_master_state_info info;
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+
+ info.master = master;
+ info.operational = dsa_port_master_is_operational(cpu_dp);
+
+ dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info);
+}
+
+void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of admin state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (up && cpu_dp->master_oper_up))
+ notify = true;
+
+ cpu_dp->master_admin_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ bool up)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ bool notify = false;
+
+ /* Don't keep track of oper state on LAG DSA masters,
+ * but rather just of physical DSA masters
+ */
+ if (netif_is_lag_master(master))
+ return;
+
+ if ((dsa_port_master_is_operational(cpu_dp)) !=
+ (cpu_dp->master_admin_up && up))
+ notify = true;
+
+ cpu_dp->master_oper_up = up;
+
+ if (notify)
+ dsa_tree_master_state_change(dst, master);
+}
+
+static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp;
+
+ dsa_switch_for_each_port(dp, ds)
+ if (dp->index == index)
+ return dp;
+
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (!dp)
+ return NULL;
+
+ dp->ds = ds;
+ dp->index = index;
+
+ mutex_init(&dp->addr_lists_lock);
+ mutex_init(&dp->vlans_lock);
+ INIT_LIST_HEAD(&dp->fdbs);
+ INIT_LIST_HEAD(&dp->mdbs);
+ INIT_LIST_HEAD(&dp->vlans);
+ INIT_LIST_HEAD(&dp->list);
+ list_add_tail(&dp->list, &dst->ports);
+
+ return dp;
+}
+
+static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
+{
+ dp->type = DSA_PORT_TYPE_USER;
+ dp->name = name;
+
+ return 0;
+}
+
+static int dsa_port_parse_dsa(struct dsa_port *dp)
+{
+ dp->type = DSA_PORT_TYPE_DSA;
+
+ return 0;
+}
+
+static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
+ struct net_device *master)
+{
+ enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
+ struct dsa_switch *mds, *ds = dp->ds;
+ unsigned int mdp_upstream;
+ struct dsa_port *mdp;
+
+ /* It is possible to stack DSA switches onto one another when that
+ * happens the switch driver may want to know if its tagging protocol
+ * is going to work in such a configuration.
+ */
+ if (dsa_slave_dev_check(master)) {
+ mdp = dsa_slave_to_port(master);
+ mds = mdp->ds;
+ mdp_upstream = dsa_upstream_port(mds, mdp->index);
+ tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
+ DSA_TAG_PROTO_NONE);
+ }
+
+ /* If the master device is not itself a DSA slave in a disjoint DSA
+ * tree, then return immediately.
+ */
+ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
+}
+
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+ const char *user_protocol)
+{
+ const struct dsa_device_ops *tag_ops = NULL;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ enum dsa_tag_protocol default_proto;
+
+ /* Find out which protocol the switch would prefer. */
+ default_proto = dsa_get_tag_protocol(dp, master);
+ if (dst->default_proto) {
+ if (dst->default_proto != default_proto) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+ return -EINVAL;
+ }
+ } else {
+ dst->default_proto = default_proto;
+ }
+
+ /* See if the user wants to override that preference. */
+ if (user_protocol) {
+ if (!ds->ops->change_tag_protocol) {
+ dev_err(ds->dev, "Tag protocol cannot be modified\n");
+ return -EINVAL;
+ }
+
+ tag_ops = dsa_tag_driver_get_by_name(user_protocol);
+ if (IS_ERR(tag_ops)) {
+ dev_warn(ds->dev,
+ "Failed to find a tagging driver for protocol %s, using default\n",
+ user_protocol);
+ tag_ops = NULL;
+ }
+ }
+
+ if (!tag_ops)
+ tag_ops = dsa_tag_driver_get_by_id(default_proto);
+
+ if (IS_ERR(tag_ops)) {
+ if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
+
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ return PTR_ERR(tag_ops);
+ }
+
+ if (dst->tag_ops) {
+ if (dst->tag_ops != tag_ops) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+
+ dsa_tag_driver_put(tag_ops);
+ return -EINVAL;
+ }
+
+ /* In the case of multiple CPU ports per switch, the tagging
+ * protocol is still reference-counted only per switch tree.
+ */
+ dsa_tag_driver_put(tag_ops);
+ } else {
+ dst->tag_ops = tag_ops;
+ }
+
+ dp->master = master;
+ dp->type = DSA_PORT_TYPE_CPU;
+ dsa_port_set_tag_protocol(dp, dst->tag_ops);
+ dp->dst = dst;
+
+ /* At this point, the tree may be configured to use a different
+ * tagger than the one chosen by the switch driver during
+ * .setup, in the case when a user selects a custom protocol
+ * through the DT.
+ *
+ * This is resolved by syncing the driver with the tree in
+ * dsa_switch_setup_tag_protocol once .setup has run and the
+ * driver is ready to accept calls to .change_tag_protocol. If
+ * the driver does not support the custom protocol at that
+ * point, the tree is wholly rejected, thereby ensuring that the
+ * tree and driver are always in agreement on the protocol to
+ * use.
+ */
+ return 0;
+}
+
+static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
+{
+ struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
+ const char *name = of_get_property(dn, "label", NULL);
+ bool link = of_property_read_bool(dn, "link");
+
+ dp->dn = dn;
+
+ if (ethernet) {
+ struct net_device *master;
+ const char *user_protocol;
+
+ master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
+ return dsa_port_parse_cpu(dp, master, user_protocol);
+ }
+
+ if (link)
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
+}
+
+static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
+ struct device_node *dn)
+{
+ struct device_node *ports, *port;
+ struct dsa_port *dp;
+ int err = 0;
+ u32 reg;
+
+ ports = of_get_child_by_name(dn, "ports");
+ if (!ports) {
+ /* The second possibility is "ethernet-ports" */
+ ports = of_get_child_by_name(dn, "ethernet-ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
+ }
+
+ for_each_available_child_of_node(ports, port) {
+ err = of_property_read_u32(port, "reg", &reg);
+ if (err) {
+ of_node_put(port);
+ goto out_put_node;
+ }
+
+ if (reg >= ds->num_ports) {
+ dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n",
+ port, reg, ds->num_ports);
+ of_node_put(port);
+ err = -EINVAL;
+ goto out_put_node;
+ }
+
+ dp = dsa_to_port(ds, reg);
+
+ err = dsa_port_parse_of(dp, port);
+ if (err) {
+ of_node_put(port);
+ goto out_put_node;
+ }
+ }
+
+out_put_node:
+ of_node_put(ports);
+ return err;
+}
+
+static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+ struct device_node *dn)
+{
+ u32 m[2] = { 0, 0 };
+ int sz;
+
+ /* Don't error out if this optional property isn't found */
+ sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
+ if (sz < 0 && sz != -EINVAL)
+ return sz;
+
+ ds->index = m[1];
+
+ ds->dst = dsa_tree_touch(m[0]);
+ if (!ds->dst)
+ return -ENOMEM;
+
+ if (dsa_switch_find(ds->dst->index, ds->index)) {
+ dev_err(ds->dev,
+ "A DSA switch with index %d already exists in tree %d\n",
+ ds->index, ds->dst->index);
+ return -EEXIST;
+ }
+
+ if (ds->dst->last_switch < ds->index)
+ ds->dst->last_switch = ds->index;
+
+ return 0;
+}
+
+static int dsa_switch_touch_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = dsa_port_touch(ds, port);
+ if (!dp)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
+{
+ int err;
+
+ err = dsa_switch_parse_member_of(ds, dn);
+ if (err)
+ return err;
+
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
+ return dsa_switch_parse_ports_of(ds, dn);
+}
+
+static int dev_is_class(struct device *dev, void *class)
+{
+ if (dev->class != NULL && !strcmp(dev->class->name, class))
+ return 1;
+
+ return 0;
+}
+
+static struct device *dev_find_class(struct device *parent, char *class)
+{
+ if (dev_is_class(parent, class)) {
+ get_device(parent);
+ return parent;
+ }
+
+ return device_find_child(parent, class, dev_is_class);
+}
+
+static struct net_device *dsa_dev_to_net_device(struct device *dev)
+{
+ struct device *d;
+
+ d = dev_find_class(dev, "net");
+ if (d != NULL) {
+ struct net_device *nd;
+
+ nd = to_net_dev(d);
+ dev_hold(nd);
+ put_device(d);
+
+ return nd;
+ }
+
+ return NULL;
+}
+
+static int dsa_port_parse(struct dsa_port *dp, const char *name,
+ struct device *dev)
+{
+ if (!strcmp(name, "cpu")) {
+ struct net_device *master;
+
+ master = dsa_dev_to_net_device(dev);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ dev_put(master);
+
+ return dsa_port_parse_cpu(dp, master, NULL);
+ }
+
+ if (!strcmp(name, "dsa"))
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
+}
+
+static int dsa_switch_parse_ports(struct dsa_switch *ds,
+ struct dsa_chip_data *cd)
+{
+ bool valid_name_found = false;
+ struct dsa_port *dp;
+ struct device *dev;
+ const char *name;
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ name = cd->port_names[i];
+ dev = cd->netdev[i];
+ dp = dsa_to_port(ds, i);
+
+ if (!name)
+ continue;
+
+ err = dsa_port_parse(dp, name, dev);
+ if (err)
+ return err;
+
+ valid_name_found = true;
+ }
+
+ if (!valid_name_found && i == DSA_MAX_PORTS)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
+{
+ int err;
+
+ ds->cd = cd;
+
+ /* We don't support interconnected switches nor multiple trees via
+ * platform data, so this is the unique switch of the tree.
+ */
+ ds->index = 0;
+ ds->dst = dsa_tree_touch(0);
+ if (!ds->dst)
+ return -ENOMEM;
+
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
+ return dsa_switch_parse_ports(ds, cd);
+}
+
+static void dsa_switch_release_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *dp, *next;
+
+ dsa_switch_for_each_port_safe(dp, next, ds) {
+ WARN_ON(!list_empty(&dp->fdbs));
+ WARN_ON(!list_empty(&dp->mdbs));
+ WARN_ON(!list_empty(&dp->vlans));
+ list_del(&dp->list);
+ kfree(dp);
+ }
+}
+
+static int dsa_switch_probe(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst;
+ struct dsa_chip_data *pdata;
+ struct device_node *np;
+ int err;
+
+ if (!ds->dev)
+ return -ENODEV;
+
+ pdata = ds->dev->platform_data;
+ np = ds->dev->of_node;
+
+ if (!ds->num_ports)
+ return -EINVAL;
+
+ if (np) {
+ err = dsa_switch_parse_of(ds, np);
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else if (pdata) {
+ err = dsa_switch_parse(ds, pdata);
+ if (err)
+ dsa_switch_release_ports(ds);
+ } else {
+ err = -ENODEV;
+ }
+
+ if (err)
+ return err;
+
+ dst = ds->dst;
+ dsa_tree_get(dst);
+ err = dsa_tree_setup(dst);
+ if (err) {
+ dsa_switch_release_ports(ds);
+ dsa_tree_put(dst);
+ }
+
+ return err;
+}
+
+int dsa_register_switch(struct dsa_switch *ds)
+{
+ int err;
+
+ mutex_lock(&dsa2_mutex);
+ err = dsa_switch_probe(ds);
+ dsa_tree_put(ds->dst);
+ mutex_unlock(&dsa2_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dsa_register_switch);
+
+static void dsa_switch_remove(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+
+ dsa_tree_teardown(dst);
+ dsa_switch_release_ports(ds);
+ dsa_tree_put(dst);
+}
+
+void dsa_unregister_switch(struct dsa_switch *ds)
+{
+ mutex_lock(&dsa2_mutex);
+ dsa_switch_remove(ds);
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+ struct net_device *master, *slave_dev;
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+
+ if (!ds->setup)
+ goto out;
+
+ rtnl_lock();
+
+ dsa_switch_for_each_user_port(dp, ds) {
+ master = dsa_port_to_master(dp);
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ }
+
+ /* Disconnect from further netdevice notifiers on the master,
+ * since netdev_uses_dsa() will now return false.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds)
+ dp->master->dsa_ptr = NULL;
+
+ rtnl_unlock();
+out:
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
+
+#ifdef CONFIG_PM_SLEEP
+static bool dsa_port_is_initialized(const struct dsa_port *dp)
+{
+ return dp->type == DSA_PORT_TYPE_USER && dp->slave;
+}
+
+int dsa_switch_suspend(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int ret = 0;
+
+ /* Suspend slave network devices */
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
+ continue;
+
+ ret = dsa_slave_suspend(dp->slave);
+ if (ret)
+ return ret;
+ }
+
+ if (ds->ops->suspend)
+ ret = ds->ops->suspend(ds);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
+
+int dsa_switch_resume(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int ret = 0;
+
+ if (ds->ops->resume)
+ ret = ds->ops->resume(ds);
+
+ if (ret)
+ return ret;
+
+ /* Resume slave network devices */
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_initialized(dp))
+ continue;
+
+ ret = dsa_slave_resume(dp->slave);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
+#endif
+
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
+{
+ if (!netdev || !dsa_slave_dev_check(netdev))
+ return ERR_PTR(-ENODEV);
+
+ return dsa_slave_to_port(netdev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
+
+bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b)
+{
+ if (a->type != b->type)
+ return false;
+
+ switch (a->type) {
+ case DSA_DB_PORT:
+ return a->dp == b->dp;
+ case DSA_DB_LAG:
+ return a->lag.dev == b->lag.dev;
+ case DSA_DB_BRIDGE:
+ return a->bridge.num == b->bridge.num;
+ default:
+ WARN_ON(1);
+ return false;
+ }
+}
+
+bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->fdbs, list) {
+ if (!ether_addr_equal(a->addr, addr) || a->vid != vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_fdb_present_in_other_db);
+
+bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct dsa_db db)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+
+ lockdep_assert_held(&dp->addr_lists_lock);
+
+ list_for_each_entry(a, &dp->mdbs, list) {
+ if (!ether_addr_equal(a->addr, mdb->addr) || a->vid != mdb->vid)
+ continue;
+
+ if (a->db.type == db.type && !dsa_db_equal(&a->db, &db))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db);
+
+static int __init dsa_init_module(void)
+{
+ int rc;
+
+ dsa_owq = alloc_ordered_workqueue("dsa_ordered",
+ WQ_MEM_RECLAIM);
+ if (!dsa_owq)
+ return -ENOMEM;
+
+ rc = dsa_slave_register_notifier();
+ if (rc)
+ goto register_notifier_fail;
+
+ dev_add_pack(&dsa_pack_type);
+
+ rc = rtnl_link_register(&dsa_link_ops);
+ if (rc)
+ goto netlink_register_fail;
+
+ return 0;
+
+netlink_register_fail:
+ dsa_slave_unregister_notifier();
+ dev_remove_pack(&dsa_pack_type);
+register_notifier_fail:
+ destroy_workqueue(dsa_owq);
+
+ return rc;
+}
+module_init(dsa_init_module);
+
+static void __exit dsa_cleanup_module(void)
+{
+ rtnl_link_unregister(&dsa_link_ops);
+
+ dsa_slave_unregister_notifier();
+ dev_remove_pack(&dsa_pack_type);
+ destroy_workqueue(dsa_owq);
+}
+module_exit(dsa_cleanup_module);
+
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
+MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dsa");