aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev_ioctl.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /net/core/dev_ioctl.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to '')
-rw-r--r--net/core/dev_ioctl.c619
1 files changed, 619 insertions, 0 deletions
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
new file mode 100644
index 000000000..5cdbfbf9a
--- /dev/null
+++ b/net/core/dev_ioctl.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmod.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/net_tstamp.h>
+#include <linux/wireless.h>
+#include <linux/if_bridge.h>
+#include <net/dsa.h>
+#include <net/wext.h>
+
+#include "dev.h"
+
+/*
+ * Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ * We need this ioctl for efficient implementation of the
+ * if_indextoname() function required by the IPv6 API. Without
+ * it, we would have to search all the interfaces to find a
+ * match. --pb
+ */
+
+static int dev_ifname(struct net *net, struct ifreq *ifr)
+{
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+ return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
+}
+
+/*
+ * Perform a SIOCGIFCONF call. This structure will change
+ * size eventually, and there is nothing I can do about it.
+ * Thus we will need a 'compatibility mode'.
+ */
+int dev_ifconf(struct net *net, struct ifconf __user *uifc)
+{
+ struct net_device *dev;
+ void __user *pos;
+ size_t size;
+ int len, total = 0, done;
+
+ /* both the ifconf and the ifreq structures are slightly different */
+ if (in_compat_syscall()) {
+ struct compat_ifconf ifc32;
+
+ if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf)))
+ return -EFAULT;
+
+ pos = compat_ptr(ifc32.ifcbuf);
+ len = ifc32.ifc_len;
+ size = sizeof(struct compat_ifreq);
+ } else {
+ struct ifconf ifc;
+
+ if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
+ return -EFAULT;
+
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+ size = sizeof(struct ifreq);
+ }
+
+ /* Loop over the interfaces, and write an info block for each. */
+ rtnl_lock();
+ for_each_netdev(net, dev) {
+ if (!pos)
+ done = inet_gifconf(dev, NULL, 0, size);
+ else
+ done = inet_gifconf(dev, pos + total,
+ len - total, size);
+ if (done < 0) {
+ rtnl_unlock();
+ return -EFAULT;
+ }
+ total += done;
+ }
+ rtnl_unlock();
+
+ return put_user(total, &uifc->ifc_len);
+}
+
+static int dev_getifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct ifmap *ifmap = &ifr->ifr_map;
+
+ if (in_compat_syscall()) {
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap;
+
+ cifmap->mem_start = dev->mem_start;
+ cifmap->mem_end = dev->mem_end;
+ cifmap->base_addr = dev->base_addr;
+ cifmap->irq = dev->irq;
+ cifmap->dma = dev->dma;
+ cifmap->port = dev->if_port;
+
+ return 0;
+ }
+
+ ifmap->mem_start = dev->mem_start;
+ ifmap->mem_end = dev->mem_end;
+ ifmap->base_addr = dev->base_addr;
+ ifmap->irq = dev->irq;
+ ifmap->dma = dev->dma;
+ ifmap->port = dev->if_port;
+
+ return 0;
+}
+
+static int dev_setifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map;
+
+ if (!dev->netdev_ops->ndo_set_config)
+ return -EOPNOTSUPP;
+
+ if (in_compat_syscall()) {
+ struct ifmap ifmap = {
+ .mem_start = cifmap->mem_start,
+ .mem_end = cifmap->mem_end,
+ .base_addr = cifmap->base_addr,
+ .irq = cifmap->irq,
+ .dma = cifmap->dma,
+ .port = cifmap->port,
+ };
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifmap);
+ }
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map);
+}
+
+/*
+ * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
+ */
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+
+ switch (cmd) {
+ case SIOCGIFFLAGS: /* Get interface flags */
+ ifr->ifr_flags = (short) dev_get_flags(dev);
+ return 0;
+
+ case SIOCGIFMETRIC: /* Get the metric on the interface
+ (currently unused) */
+ ifr->ifr_metric = 0;
+ return 0;
+
+ case SIOCGIFMTU: /* Get the MTU of a device */
+ ifr->ifr_mtu = dev->mtu;
+ return 0;
+
+ case SIOCGIFSLAVE:
+ err = -EINVAL;
+ break;
+
+ case SIOCGIFMAP:
+ return dev_getifmap(dev, ifr);
+
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ default:
+ /* dev_ioctl() should ensure this case
+ * is never reached
+ */
+ WARN_ON(1);
+ err = -ENOTTY;
+ break;
+
+ }
+ return err;
+}
+
+static int net_hwtstamp_validate(struct ifreq *ifr)
+{
+ struct hwtstamp_config cfg;
+ enum hwtstamp_tx_types tx_type;
+ enum hwtstamp_rx_filters rx_filter;
+ int tx_type_valid = 0;
+ int rx_filter_valid = 0;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ if (cfg.flags & ~HWTSTAMP_FLAG_MASK)
+ return -EINVAL;
+
+ tx_type = cfg.tx_type;
+ rx_filter = cfg.rx_filter;
+
+ switch (tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
+ tx_type_valid = 1;
+ break;
+ case __HWTSTAMP_TX_CNT:
+ /* not a real value */
+ break;
+ }
+
+ switch (rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ rx_filter_valid = 1;
+ break;
+ case __HWTSTAMP_FILTER_CNT:
+ /* not a real value */
+ break;
+ }
+
+ if (!tx_type_valid || !rx_filter_valid)
+ return -ERANGE;
+
+ return 0;
+}
+
+static int dev_eth_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int err;
+
+ err = dsa_ndo_eth_ioctl(dev, ifr, cmd);
+ if (err == 0 || err != -EOPNOTSUPP)
+ return err;
+
+ if (ops->ndo_eth_ioctl) {
+ if (netif_device_present(dev))
+ err = ops->ndo_eth_ioctl(dev, ifr, cmd);
+ else
+ err = -ENODEV;
+ }
+
+ return err;
+}
+
+static int dev_siocbond(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocbond) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocbond(dev, ifr, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocdevprivate) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocdevprivate(dev, ifr, data, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocwandev) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocwandev(dev, ifs);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
+ unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ const struct net_device_ops *ops;
+ netdevice_tracker dev_tracker;
+
+ if (!dev)
+ return -ENODEV;
+
+ ops = dev->netdev_ops;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags, NULL);
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface
+ (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCSIFHWADDR:
+ if (dev->addr_len > sizeof(struct sockaddr))
+ return -EINVAL;
+ return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL);
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+ min(sizeof(ifr->ifr_hwaddr.sa_data_min),
+ (size_t)dev->addr_len));
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return 0;
+
+ case SIOCSIFMAP:
+ return dev_setifmap(dev, ifr);
+
+ case SIOCADDMULTI:
+ if (!ops->ndo_set_rx_mode ||
+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
+
+ case SIOCDELMULTI:
+ if (!ops->ndo_set_rx_mode ||
+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
+
+ case SIOCSIFTXQLEN:
+ if (ifr->ifr_qlen < 0)
+ return -EINVAL;
+ return dev_change_tx_queue_len(dev, ifr->ifr_qlen);
+
+ case SIOCSIFNAME:
+ ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+ return dev_change_name(dev, ifr->ifr_newname);
+
+ case SIOCWANDEV:
+ return dev_siocwandev(dev, &ifr->ifr_settings);
+
+ case SIOCBRADDIF:
+ case SIOCBRDELIF:
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ if (!netif_is_bridge_master(dev))
+ return -EOPNOTSUPP;
+ netdev_hold(dev, &dev_tracker, GFP_KERNEL);
+ rtnl_unlock();
+ err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+ netdev_put(dev, &dev_tracker);
+ rtnl_lock();
+ return err;
+
+ case SIOCSHWTSTAMP:
+ err = net_hwtstamp_validate(ifr);
+ if (err)
+ return err;
+ fallthrough;
+
+ /*
+ * Unknown or private ioctl
+ */
+ default:
+ if (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)
+ return dev_siocdevprivate(dev, ifr, data, cmd);
+
+ if (cmd == SIOCGMIIPHY ||
+ cmd == SIOCGMIIREG ||
+ cmd == SIOCSMIIREG ||
+ cmd == SIOCSHWTSTAMP ||
+ cmd == SIOCGHWTSTAMP) {
+ err = dev_eth_ioctl(dev, ifr, cmd);
+ } else if (cmd == SIOCBONDENSLAVE ||
+ cmd == SIOCBONDRELEASE ||
+ cmd == SIOCBONDSETHWADDR ||
+ cmd == SIOCBONDSLAVEINFOQUERY ||
+ cmd == SIOCBONDINFOQUERY ||
+ cmd == SIOCBONDCHANGEACTIVE) {
+ err = dev_siocbond(dev, ifr, cmd);
+ } else
+ err = -EINVAL;
+
+ }
+ return err;
+}
+
+/**
+ * dev_load - load a network module
+ * @net: the applicable net namespace
+ * @name: name of interface
+ *
+ * If a network interface is not present and the process has suitable
+ * privileges this function loads the module. If module loading is not
+ * available in this kernel then it becomes a nop.
+ */
+
+void dev_load(struct net *net, const char *name)
+{
+ struct net_device *dev;
+ int no_module;
+
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, name);
+ rcu_read_unlock();
+
+ no_module = !dev;
+ if (no_module && capable(CAP_NET_ADMIN))
+ no_module = request_module("netdev-%s", name);
+ if (no_module && capable(CAP_SYS_MODULE))
+ request_module("%s", name);
+}
+EXPORT_SYMBOL(dev_load);
+
+/*
+ * This function handles all "interface"-type I/O control requests. The actual
+ * 'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ * dev_ioctl - network device ioctl
+ * @net: the applicable net namespace
+ * @cmd: command to issue
+ * @ifr: pointer to a struct ifreq in user space
+ * @need_copyout: whether or not copy_to_user() should be called
+ *
+ * Issue ioctl functions to devices. This is normally called by the
+ * user space syscall interfaces but can sometimes be useful for
+ * other purposes. The return value is the return from the syscall if
+ * positive or a negative errno code on error.
+ */
+
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+ void __user *data, bool *need_copyout)
+{
+ int ret;
+ char *colon;
+
+ if (need_copyout)
+ *need_copyout = true;
+ if (cmd == SIOCGIFNAME)
+ return dev_ifname(net, ifr);
+
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+
+ colon = strchr(ifr->ifr_name, ':');
+ if (colon)
+ *colon = 0;
+
+ /*
+ * See which interface the caller is talking about.
+ */
+
+ switch (cmd) {
+ case SIOCGIFHWADDR:
+ dev_load(net, ifr->ifr_name);
+ ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+ if (colon)
+ *colon = ':';
+ return ret;
+ /*
+ * These ioctl calls:
+ * - can be done by all.
+ * - atomic and do not require locking.
+ * - return a value
+ */
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFSLAVE:
+ case SIOCGIFMAP:
+ case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+ dev_load(net, ifr->ifr_name);
+ rcu_read_lock();
+ ret = dev_ifsioc_locked(net, ifr, cmd);
+ rcu_read_unlock();
+ if (colon)
+ *colon = ':';
+ return ret;
+
+ case SIOCETHTOOL:
+ dev_load(net, ifr->ifr_name);
+ ret = dev_ethtool(net, ifr, data);
+ if (colon)
+ *colon = ':';
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - return a value
+ */
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSIFNAME:
+ dev_load(net, ifr->ifr_name);
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ rtnl_lock();
+ ret = dev_ifsioc(net, ifr, data, cmd);
+ rtnl_unlock();
+ if (colon)
+ *colon = ':';
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
+ case SIOCSIFMAP:
+ case SIOCSIFTXQLEN:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ fallthrough;
+ /*
+ * These ioctl calls:
+ * - require local superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+ case SIOCSIFMTU:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ case SIOCSIFHWBROADCAST:
+ case SIOCSMIIREG:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCBRADDIF:
+ case SIOCBRDELIF:
+ case SIOCSHWTSTAMP:
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ fallthrough;
+ case SIOCBONDSLAVEINFOQUERY:
+ case SIOCBONDINFOQUERY:
+ dev_load(net, ifr->ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(net, ifr, data, cmd);
+ rtnl_unlock();
+ if (need_copyout)
+ *need_copyout = false;
+ return ret;
+
+ case SIOCGIFMEM:
+ /* Get the per device memory space. We can add this but
+ * currently do not support it */
+ case SIOCSIFMEM:
+ /* Set the per device memory buffer space.
+ * Not applicable in our case */
+ case SIOCSIFLINK:
+ return -ENOTTY;
+
+ /*
+ * Unknown or private ioctl.
+ */
+ default:
+ if (cmd == SIOCWANDEV ||
+ cmd == SIOCGHWTSTAMP ||
+ (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)) {
+ dev_load(net, ifr->ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(net, ifr, data, cmd);
+ rtnl_unlock();
+ return ret;
+ }
+ return -ENOTTY;
+ }
+}