aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6/anycast.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /net/ipv6/anycast.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'net/ipv6/anycast.c')
-rw-r--r--net/ipv6/anycast.c619
1 files changed, 619 insertions, 0 deletions
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
new file mode 100644
index 000000000..dacdea7fc
--- /dev/null
+++ b/net/ipv6/anycast.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Anycast support for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * David L Stevens (dlstevens@us.ibm.com)
+ *
+ * based heavily on net/ipv6/mcast.c
+ */
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/checksum.h>
+
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
+/*
+ * socket join an anycast group
+ */
+
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev;
+ struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
+ int ishost = !net->ipv6.devconf_all->forwarding;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
+ return -EINVAL;
+
+ pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+ if (!pac)
+ return -ENOMEM;
+ pac->acl_next = NULL;
+ pac->acl_addr = *addr;
+
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
+ if (rt) {
+ dev = rt->dst.dev;
+ ip6_rt_put(rt);
+ } else if (ishost) {
+ err = -EADDRNOTAVAIL;
+ goto error;
+ } else {
+ /* router, no matching interface: just pick one */
+ dev = __dev_get_by_flags(net, IFF_UP,
+ IFF_UP | IFF_LOOPBACK);
+ }
+ }
+
+ if (!dev) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ idev = __in6_dev_get(dev);
+ if (!idev) {
+ if (ifindex)
+ err = -ENODEV;
+ else
+ err = -EADDRNOTAVAIL;
+ goto error;
+ }
+ /* reset ishost, now that we have a specific device */
+ ishost = !idev->cnf.forwarding;
+
+ pac->acl_ifindex = dev->ifindex;
+
+ /* XXX
+ * For hosts, allow link-local or matching prefix anycasts.
+ * This obviates the need for propagating anycast routes while
+ * still allowing some non-router anycast participation.
+ */
+ if (!ipv6_chk_prefix(addr, dev)) {
+ if (ishost)
+ err = -EADDRNOTAVAIL;
+ if (err)
+ goto error;
+ }
+
+ err = __ipv6_dev_ac_inc(idev, addr);
+ if (!err) {
+ pac->acl_next = np->ipv6_ac_list;
+ np->ipv6_ac_list = pac;
+ pac = NULL;
+ }
+
+error:
+ if (pac)
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return err;
+}
+
+/*
+ * socket leave an anycast group
+ */
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev;
+ struct ipv6_ac_socklist *pac, *prev_pac;
+ struct net *net = sock_net(sk);
+
+ ASSERT_RTNL();
+
+ prev_pac = NULL;
+ for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
+ if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
+ ipv6_addr_equal(&pac->acl_addr, addr))
+ break;
+ prev_pac = pac;
+ }
+ if (!pac)
+ return -ENOENT;
+ if (prev_pac)
+ prev_pac->acl_next = pac->acl_next;
+ else
+ np->ipv6_ac_list = pac->acl_next;
+
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
+ if (dev)
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return 0;
+}
+
+void __ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct ipv6_ac_socklist *pac;
+ struct net *net = sock_net(sk);
+ int prev_index;
+
+ ASSERT_RTNL();
+ pac = np->ipv6_ac_list;
+ np->ipv6_ac_list = NULL;
+
+ prev_index = 0;
+ while (pac) {
+ struct ipv6_ac_socklist *next = pac->acl_next;
+
+ if (pac->acl_ifindex != prev_index) {
+ dev = __dev_get_by_index(net, pac->acl_ifindex);
+ prev_index = pac->acl_ifindex;
+ }
+ if (dev)
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ pac = next;
+ }
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (!np->ipv6_ac_list)
+ return;
+ rtnl_lock();
+ __ipv6_sock_ac_close(sk);
+ rtnl_unlock();
+}
+
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+ unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+ spin_lock(&acaddr_hash_lock);
+ hlist_del_init_rcu(&aca->aca_addr_lst);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void aca_get(struct ifacaddr6 *aca)
+{
+ refcount_inc(&aca->aca_refcnt);
+}
+
+static void aca_free_rcu(struct rcu_head *h)
+{
+ struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+ fib6_info_release(aca->aca_rt);
+ kfree(aca);
+}
+
+static void aca_put(struct ifacaddr6 *ac)
+{
+ if (refcount_dec_and_test(&ac->aca_refcnt)) {
+ call_rcu(&ac->rcu, aca_free_rcu);
+ }
+}
+
+static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
+ const struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca;
+
+ aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
+ if (!aca)
+ return NULL;
+
+ aca->aca_addr = *addr;
+ fib6_info_hold(f6i);
+ aca->aca_rt = f6i;
+ INIT_HLIST_NODE(&aca->aca_addr_lst);
+ aca->aca_users = 1;
+ /* aca_tstamp should be updated upon changes */
+ aca->aca_cstamp = aca->aca_tstamp = jiffies;
+ refcount_set(&aca->aca_refcnt, 1);
+
+ return aca;
+}
+
+/*
+ * device anycast group inc (add if not found)
+ */
+int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca;
+ struct fib6_info *f6i;
+ struct net *net;
+ int err;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&idev->lock);
+ if (idev->dead) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
+ aca->aca_users++;
+ err = 0;
+ goto out;
+ }
+ }
+
+ net = dev_net(idev->dev);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ if (IS_ERR(f6i)) {
+ err = PTR_ERR(f6i);
+ goto out;
+ }
+ aca = aca_alloc(f6i, addr);
+ if (!aca) {
+ fib6_info_release(f6i);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ aca->aca_next = idev->ac_list;
+ idev->ac_list = aca;
+
+ /* Hold this for addrconf_join_solict() below before we unlock,
+ * it is already exposed via idev->ac_list.
+ */
+ aca_get(aca);
+ write_unlock_bh(&idev->lock);
+
+ ipv6_add_acaddr_hash(net, aca);
+
+ ip6_ins_rt(net, f6i);
+
+ addrconf_join_solict(idev->dev, &aca->aca_addr);
+
+ aca_put(aca);
+ return 0;
+out:
+ write_unlock_bh(&idev->lock);
+ return err;
+}
+
+/*
+ * device anycast group decrement
+ */
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca, *prev_aca;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&idev->lock);
+ prev_aca = NULL;
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ prev_aca = aca;
+ }
+ if (!aca) {
+ write_unlock_bh(&idev->lock);
+ return -ENOENT;
+ }
+ if (--aca->aca_users > 0) {
+ write_unlock_bh(&idev->lock);
+ return 0;
+ }
+ if (prev_aca)
+ prev_aca->aca_next = aca->aca_next;
+ else
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+
+ aca_put(aca);
+ return 0;
+}
+
+/* called with rtnl_lock() */
+static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev)
+ return -ENODEV;
+ return __ipv6_dev_ac_dec(idev, addr);
+}
+
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifacaddr6 *aca;
+
+ write_lock_bh(&idev->lock);
+ while ((aca = idev->ac_list) != NULL) {
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+
+ ipv6_del_acaddr_hash(aca);
+
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+
+ aca_put(aca);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
+/*
+ * check if the interface has this anycast address
+ * called with rcu_read_lock()
+ */
+static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
+{
+ struct inet6_dev *idev;
+ struct ifacaddr6 *aca;
+
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ read_unlock_bh(&idev->lock);
+ return aca != NULL;
+ }
+ return false;
+}
+
+/*
+ * check if given interface (or any, if dev==0) has this anycast address
+ */
+bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
+{
+ struct net_device *nh_dev;
+ struct ifacaddr6 *aca;
+ bool found = false;
+
+ rcu_read_lock();
+ if (dev)
+ found = ipv6_chk_acast_dev(dev, addr);
+ else {
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+
+ hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+ aca_addr_lst) {
+ nh_dev = fib6_info_nh_dev(aca->aca_rt);
+ if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+ continue;
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
+ found = true;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+ return found;
+}
+
+/* check if this anycast address is link-local on given interface or
+ * is global
+ */
+bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
+ const struct in6_addr *addr)
+{
+ return ipv6_chk_acast_addr(net,
+ (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
+ dev : NULL),
+ addr);
+}
+
+#ifdef CONFIG_PROC_FS
+struct ac6_iter_state {
+ struct seq_net_private p;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+};
+
+#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
+
+static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
+{
+ struct ifacaddr6 *im = NULL;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct net *net = seq_file_net(seq);
+
+ state->idev = NULL;
+ for_each_netdev_rcu(net, state->dev) {
+ struct inet6_dev *idev;
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->ac_list;
+ if (im) {
+ state->idev = idev;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ im = im->aca_next;
+ while (!im) {
+ if (likely(state->idev != NULL))
+ read_unlock_bh(&state->idev->lock);
+
+ state->dev = next_net_device_rcu(state->dev);
+ if (!state->dev) {
+ state->idev = NULL;
+ break;
+ }
+ state->idev = __in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ im = state->idev->ac_list;
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ifacaddr6 *im = ac6_get_first(seq);
+ if (im)
+ while (pos && (im = ac6_get_next(seq, im)) != NULL)
+ --pos;
+ return pos ? NULL : im;
+}
+
+static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ac6_get_idx(seq, *pos);
+}
+
+static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ifacaddr6 *im = ac6_get_next(seq, v);
+
+ ++*pos;
+ return im;
+}
+
+static void ac6_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ state->idev = NULL;
+ }
+ rcu_read_unlock();
+}
+
+static int ac6_seq_show(struct seq_file *seq, void *v)
+{
+ struct ifacaddr6 *im = (struct ifacaddr6 *)v;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
+ state->dev->ifindex, state->dev->name,
+ &im->aca_addr, im->aca_users);
+ return 0;
+}
+
+static const struct seq_operations ac6_seq_ops = {
+ .start = ac6_seq_start,
+ .next = ac6_seq_next,
+ .stop = ac6_seq_stop,
+ .show = ac6_seq_show,
+};
+
+int __net_init ac6_proc_init(struct net *net)
+{
+ if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
+ sizeof(struct ac6_iter_state)))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ac6_proc_exit(struct net *net)
+{
+ remove_proc_entry("anycast6", net->proc_net);
+}
+#endif
+
+/* Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}