From 5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 21 Feb 2023 18:24:12 -0800
Subject: Merge tag 'net-next-6.3' of
 git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from Jakub Kicinski:
 "Core:

   - Add dedicated kmem_cache for typical/small skb->head, avoid having
     to access struct page at kfree time, and improve memory use.

   - Introduce sysctl to set default RPS configuration for new netdevs.

   - Define Netlink protocol specification format which can be used to
     describe messages used by each family and auto-generate parsers.
     Add tools for generating kernel data structures and uAPI headers.

   - Expose all net/core sysctls inside netns.

   - Remove 4s sleep in netpoll if carrier is instantly detected on
     boot.

   - Add configurable limit of MDB entries per port, and port-vlan.

   - Continue populating drop reasons throughout the stack.

   - Retire a handful of legacy Qdiscs and classifiers.

  Protocols:

   - Support IPv4 big TCP (TSO frames larger than 64kB).

   - Add IP_LOCAL_PORT_RANGE socket option, to control local port range
     on socket by socket basis.

   - Track and report in procfs number of MPTCP sockets used.

   - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path
     manager.

   - IPv6: don't check net.ipv6.route.max_size and rely on garbage
     collection to free memory (similarly to IPv4).

   - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986).

   - ICMP: add per-rate limit counters.

   - Add support for user scanning requests in ieee802154.

   - Remove static WEP support.

   - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate
     reporting.

   - WiFi 7 EHT channel puncturing support (client & AP).

  BPF:

   - Add a rbtree data structure following the "next-gen data structure"
     precedent set by recently added linked list, that is, by using
     kfunc + kptr instead of adding a new BPF map type.

   - Expose XDP hints via kfuncs with initial support for RX hash and
     timestamp metadata.

   - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to
     better support decap on GRE tunnel devices not operating in collect
     metadata.

   - Improve x86 JIT's codegen for PROBE_MEM runtime error checks.

   - Remove the need for trace_printk_lock for bpf_trace_printk and
     bpf_trace_vprintk helpers.

   - Extend libbpf's bpf_tracing.h support for tracing arguments of
     kprobes/uprobes and syscall as a special case.

   - Significantly reduce the search time for module symbols by
     livepatch and BPF.

   - Enable cpumasks to be used as kptrs, which is useful for tracing
     programs tracking which tasks end up running on which CPUs in
     different time intervals.

   - Add support for BPF trampoline on s390x and riscv64.

   - Add capability to export the XDP features supported by the NIC.

   - Add __bpf_kfunc tag for marking kernel functions as kfuncs.

   - Add cgroup.memory=nobpf kernel parameter option to disable BPF
     memory accounting for container environments.

  Netfilter:

   - Remove the CLUSTERIP target. It has been marked as obsolete for
     years, and we still have WARN splats wrt races of the out-of-band
     /proc interface installed by this target.

   - Add 'destroy' commands to nf_tables. They are identical to the
     existing 'delete' commands, but do not return an error if the
     referenced object (set, chain, rule...) did not exist.

  Driver API:

   - Improve cpumask_local_spread() locality to help NICs set the right
     IRQ affinity on AMD platforms.

   - Separate C22 and C45 MDIO bus transactions more clearly.

   - Introduce new DCB table to control DSCP rewrite on egress.

   - Support configuration of Physical Layer Collision Avoidance (PLCA)
     Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of
     shared medium Ethernet.

   - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing
     preemption of low priority frames by high priority frames.

   - Add support for controlling MACSec offload using netlink SET.

   - Rework devlink instance refcounts to allow registration and
     de-registration under the instance lock. Split the code into
     multiple files, drop some of the unnecessarily granular locks and
     factor out common parts of netlink operation handling.

   - Add TX frame aggregation parameters (for USB drivers).

   - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning
     messages with notifications for debug.

   - Allow offloading of UDP NEW connections via act_ct.

   - Add support for per action HW stats in TC.

   - Support hardware miss to TC action (continue processing in SW from
     a specific point in the action chain).

   - Warn if old Wireless Extension user space interface is used with
     modern cfg80211/mac80211 drivers. Do not support Wireless
     Extensions for Wi-Fi 7 devices at all. Everyone should switch to
     using nl80211 interface instead.

   - Improve the CAN bit timing configuration. Use extack to return
     error messages directly to user space, update the SJW handling,
     including the definition of a new default value that will benefit
     CAN-FD controllers, by increasing their oscillator tolerance.

  New hardware / drivers:

   - Ethernet:
      - nVidia BlueField-3 support (control traffic driver)
      - Ethernet support for imx93 SoCs
      - Motorcomm yt8531 gigabit Ethernet PHY
      - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA)
      - Microchip LAN8841 PHY (incl. cable diagnostics and PTP)
      - Amlogic gxl MDIO mux

   - WiFi:
      - RealTek RTL8188EU (rtl8xxxu)
      - Qualcomm Wi-Fi 7 devices (ath12k)

   - CAN:
      - Renesas R-Car V4H

  Drivers:

   - Bluetooth:
      - Set Per Platform Antenna Gain (PPAG) for Intel controllers.

   - Ethernet NICs:
      - Intel (1G, igc):
         - support TSN / Qbv / packet scheduling features of i226 model
      - Intel (100G, ice):
         - use GNSS subsystem instead of TTY
         - multi-buffer XDP support
         - extend support for GPIO pins to E823 devices
      - nVidia/Mellanox:
         - update the shared buffer configuration on PFC commands
         - implement PTP adjphase function for HW offset control
         - TC support for Geneve and GRE with VF tunnel offload
         - more efficient crypto key management method
         - multi-port eswitch support
      - Netronome/Corigine:
         - add DCB IEEE support
         - support IPsec offloading for NFP3800
      - Freescale/NXP (enetc):
         - support XDP_REDIRECT for XDP non-linear buffers
         - improve reconfig, avoid link flap and waiting for idle
         - support MAC Merge layer
      - Other NICs:
         - sfc/ef100: add basic devlink support for ef100
         - ionic: rx_push mode operation (writing descriptors via MMIO)
         - bnxt: use the auxiliary bus abstraction for RDMA
         - r8169: disable ASPM and reset bus in case of tx timeout
         - cpsw: support QSGMII mode for J721e CPSW9G
         - cpts: support pulse-per-second output
         - ngbe: add an mdio bus driver
         - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing
         - r8152: handle devices with FW with NCM support
         - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation
         - virtio-net: support multi buffer XDP
         - virtio/vsock: replace virtio_vsock_pkt with sk_buff
         - tsnep: XDP support

   - Ethernet high-speed switches:
      - nVidia/Mellanox (mlxsw):
         - add support for latency TLV (in FW control messages)
      - Microchip (sparx5):
         - separate explicit and implicit traffic forwarding rules, make
           the implicit rules always active
         - add support for egress DSCP rewrite
         - IS0 VCAP support (Ingress Classification)
         - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS
           etc.)
         - ES2 VCAP support (Egress Access Control)
         - support for Per-Stream Filtering and Policing (802.1Q,
           8.6.5.1)

   - Ethernet embedded switches:
      - Marvell (mv88e6xxx):
         - add MAB (port auth) offload support
         - enable PTP receive for mv88e6390
      - NXP (ocelot):
         - support MAC Merge layer
         - support for the the vsc7512 internal copper phys
      - Microchip:
         - lan9303: convert to PHYLINK
         - lan966x: support TC flower filter statistics
         - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x
         - lan937x: support Credit Based Shaper configuration
         - ksz9477: support Energy Efficient Ethernet
      - other:
         - qca8k: convert to regmap read/write API, use bulk operations
         - rswitch: Improve TX timestamp accuracy

   - Intel WiFi (iwlwifi):
      - EHT (Wi-Fi 7) rate reporting
      - STEP equalizer support: transfer some STEP (connection to radio
        on platforms with integrated wifi) related parameters from the
        BIOS to the firmware.

   - Qualcomm 802.11ax WiFi (ath11k):
      - IPQ5018 support
      - Fine Timing Measurement (FTM) responder role support
      - channel 177 support

   - MediaTek WiFi (mt76):
      - per-PHY LED support
      - mt7996: EHT (Wi-Fi 7) support
      - Wireless Ethernet Dispatch (WED) reset support
      - switch to using page pool allocator

   - RealTek WiFi (rtw89):
      - support new version of Bluetooth co-existance

   - Mobile:
      - rmnet: support TX aggregation"

* tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits)
  page_pool: add a comment explaining the fragment counter usage
  net: ethtool: fix __ethtool_dev_mm_supported() implementation
  ethtool: pse-pd: Fix double word in comments
  xsk: add linux/vmalloc.h to xsk.c
  sefltests: netdevsim: wait for devlink instance after netns removal
  selftest: fib_tests: Always cleanup before exit
  net/mlx5e: Align IPsec ASO result memory to be as required by hardware
  net/mlx5e: TC, Set CT miss to the specific ct action instance
  net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG
  net/mlx5: Refactor tc miss handling to a single function
  net/mlx5: Kconfig: Make tc offload depend on tc skb extension
  net/sched: flower: Support hardware miss to tc action
  net/sched: flower: Move filter handle initialization earlier
  net/sched: cls_api: Support hardware miss to tc action
  net/sched: Rename user cookie and act cookie
  sfc: fix builds without CONFIG_RTC_LIB
  sfc: clean up some inconsistent indentings
  net/mlx4_en: Introduce flexible array to silence overflow warning
  net: lan966x: Fix possible deadlock inside PTP
  net/ulp: Remove redundant ->clone() test in inet_clone_ulp().
  ...
---
 drivers/cpufreq/armada-37xx-cpufreq.c | 564 ++++++++++++++++++++++++++++++++++
 1 file changed, 564 insertions(+)
 create mode 100644 drivers/cpufreq/armada-37xx-cpufreq.c

(limited to 'drivers/cpufreq/armada-37xx-cpufreq.c')

diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
new file mode 100644
index 000000000..b74289a95
--- /dev/null
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * CPU frequency scaling support for Armada 37xx platform.
+ *
+ * Copyright (C) 2017 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "cpufreq-dt.h"
+
+/* Clk register set */
+#define ARMADA_37XX_CLK_TBG_SEL		0
+#define ARMADA_37XX_CLK_TBG_SEL_CPU_OFF	22
+
+/* Power management in North Bridge register set */
+#define ARMADA_37XX_NB_L0L1	0x18
+#define ARMADA_37XX_NB_L2L3	0x1C
+#define  ARMADA_37XX_NB_TBG_DIV_OFF	13
+#define  ARMADA_37XX_NB_TBG_DIV_MASK	0x7
+#define  ARMADA_37XX_NB_CLK_SEL_OFF	11
+#define  ARMADA_37XX_NB_CLK_SEL_MASK	0x1
+#define  ARMADA_37XX_NB_CLK_SEL_TBG	0x1
+#define  ARMADA_37XX_NB_TBG_SEL_OFF	9
+#define  ARMADA_37XX_NB_TBG_SEL_MASK	0x3
+#define  ARMADA_37XX_NB_VDD_SEL_OFF	6
+#define  ARMADA_37XX_NB_VDD_SEL_MASK	0x3
+#define  ARMADA_37XX_NB_CONFIG_SHIFT	16
+#define ARMADA_37XX_NB_DYN_MOD	0x24
+#define  ARMADA_37XX_NB_CLK_SEL_EN	BIT(26)
+#define  ARMADA_37XX_NB_TBG_EN		BIT(28)
+#define  ARMADA_37XX_NB_DIV_EN		BIT(29)
+#define  ARMADA_37XX_NB_VDD_EN		BIT(30)
+#define  ARMADA_37XX_NB_DFS_EN		BIT(31)
+#define ARMADA_37XX_NB_CPU_LOAD 0x30
+#define  ARMADA_37XX_NB_CPU_LOAD_MASK	0x3
+#define  ARMADA_37XX_DVFS_LOAD_0	0
+#define  ARMADA_37XX_DVFS_LOAD_1	1
+#define  ARMADA_37XX_DVFS_LOAD_2	2
+#define  ARMADA_37XX_DVFS_LOAD_3	3
+
+/* AVS register set */
+#define ARMADA_37XX_AVS_CTL0		0x0
+#define	 ARMADA_37XX_AVS_ENABLE		BIT(30)
+#define	 ARMADA_37XX_AVS_HIGH_VDD_LIMIT	16
+#define	 ARMADA_37XX_AVS_LOW_VDD_LIMIT	22
+#define	 ARMADA_37XX_AVS_VDD_MASK	0x3F
+#define ARMADA_37XX_AVS_CTL2		0x8
+#define	 ARMADA_37XX_AVS_LOW_VDD_EN	BIT(6)
+#define ARMADA_37XX_AVS_VSET(x)	    (0x1C + 4 * (x))
+
+/*
+ * On Armada 37xx the Power management manages 4 level of CPU load,
+ * each level can be associated with a CPU clock source, a CPU
+ * divider, a VDD level, etc...
+ */
+#define LOAD_LEVEL_NR	4
+
+#define MIN_VOLT_MV 1000
+#define MIN_VOLT_MV_FOR_L1_1000MHZ 1108
+#define MIN_VOLT_MV_FOR_L1_1200MHZ 1155
+
+/*  AVS value for the corresponding voltage (in mV) */
+static int avs_map[] = {
+	747, 758, 770, 782, 793, 805, 817, 828, 840, 852, 863, 875, 887, 898,
+	910, 922, 933, 945, 957, 968, 980, 992, 1003, 1015, 1027, 1038, 1050,
+	1062, 1073, 1085, 1097, 1108, 1120, 1132, 1143, 1155, 1167, 1178, 1190,
+	1202, 1213, 1225, 1237, 1248, 1260, 1272, 1283, 1295, 1307, 1318, 1330,
+	1342
+};
+
+struct armada37xx_cpufreq_state {
+	struct platform_device *pdev;
+	struct device *cpu_dev;
+	struct regmap *regmap;
+	u32 nb_l0l1;
+	u32 nb_l2l3;
+	u32 nb_dyn_mod;
+	u32 nb_cpu_load;
+};
+
+static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state;
+
+struct armada_37xx_dvfs {
+	u32 cpu_freq_max;
+	u8 divider[LOAD_LEVEL_NR];
+	u32 avs[LOAD_LEVEL_NR];
+};
+
+static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
+	/*
+	 * The cpufreq scaling for 1.2 GHz variant of the SOC is currently
+	 * unstable because we do not know how to configure it properly.
+	 */
+	/* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */
+	{.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} },
+	{.cpu_freq_max = 800*1000*1000,  .divider = {1, 2, 3, 4} },
+	{.cpu_freq_max = 600*1000*1000,  .divider = {2, 4, 5, 6} },
+};
+
+static struct armada_37xx_dvfs *armada_37xx_cpu_freq_info_get(u32 freq)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(armada_37xx_dvfs); i++) {
+		if (freq == armada_37xx_dvfs[i].cpu_freq_max)
+			return &armada_37xx_dvfs[i];
+	}
+
+	pr_err("Unsupported CPU frequency %d MHz\n", freq/1000000);
+	return NULL;
+}
+
+/*
+ * Setup the four level managed by the hardware. Once the four level
+ * will be configured then the DVFS will be enabled.
+ */
+static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
+						 struct regmap *clk_base, u8 *divider)
+{
+	u32 cpu_tbg_sel;
+	int load_lvl;
+
+	/* Determine to which TBG clock is CPU connected */
+	regmap_read(clk_base, ARMADA_37XX_CLK_TBG_SEL, &cpu_tbg_sel);
+	cpu_tbg_sel >>= ARMADA_37XX_CLK_TBG_SEL_CPU_OFF;
+	cpu_tbg_sel &= ARMADA_37XX_NB_TBG_SEL_MASK;
+
+	for (load_lvl = 0; load_lvl < LOAD_LEVEL_NR; load_lvl++) {
+		unsigned int reg, mask, val, offset = 0;
+
+		if (load_lvl <= ARMADA_37XX_DVFS_LOAD_1)
+			reg = ARMADA_37XX_NB_L0L1;
+		else
+			reg = ARMADA_37XX_NB_L2L3;
+
+		if (load_lvl == ARMADA_37XX_DVFS_LOAD_0 ||
+		    load_lvl == ARMADA_37XX_DVFS_LOAD_2)
+			offset += ARMADA_37XX_NB_CONFIG_SHIFT;
+
+		/* Set cpu clock source, for all the level we use TBG */
+		val = ARMADA_37XX_NB_CLK_SEL_TBG << ARMADA_37XX_NB_CLK_SEL_OFF;
+		mask = (ARMADA_37XX_NB_CLK_SEL_MASK
+			<< ARMADA_37XX_NB_CLK_SEL_OFF);
+
+		/* Set TBG index, for all levels we use the same TBG */
+		val = cpu_tbg_sel << ARMADA_37XX_NB_TBG_SEL_OFF;
+		mask = (ARMADA_37XX_NB_TBG_SEL_MASK
+			<< ARMADA_37XX_NB_TBG_SEL_OFF);
+
+		/*
+		 * Set cpu divider based on the pre-computed array in
+		 * order to have balanced step.
+		 */
+		val |= divider[load_lvl] << ARMADA_37XX_NB_TBG_DIV_OFF;
+		mask |= (ARMADA_37XX_NB_TBG_DIV_MASK
+			<< ARMADA_37XX_NB_TBG_DIV_OFF);
+
+		/* Set VDD divider which is actually the load level. */
+		val |= load_lvl << ARMADA_37XX_NB_VDD_SEL_OFF;
+		mask |= (ARMADA_37XX_NB_VDD_SEL_MASK
+			<< ARMADA_37XX_NB_VDD_SEL_OFF);
+
+		val <<= offset;
+		mask <<= offset;
+
+		regmap_update_bits(base, reg, mask, val);
+	}
+}
+
+/*
+ * Find out the armada 37x supported AVS value whose voltage value is
+ * the round-up closest to the target voltage value.
+ */
+static u32 armada_37xx_avs_val_match(int target_vm)
+{
+	u32 avs;
+
+	/* Find out the round-up closest supported voltage value */
+	for (avs = 0; avs < ARRAY_SIZE(avs_map); avs++)
+		if (avs_map[avs] >= target_vm)
+			break;
+
+	/*
+	 * If all supported voltages are smaller than target one,
+	 * choose the largest supported voltage
+	 */
+	if (avs == ARRAY_SIZE(avs_map))
+		avs = ARRAY_SIZE(avs_map) - 1;
+
+	return avs;
+}
+
+/*
+ * For Armada 37xx soc, L0(VSET0) VDD AVS value is set to SVC revision
+ * value or a default value when SVC is not supported.
+ * - L0 can be read out from the register of AVS_CTRL_0 and L0 voltage
+ *   can be got from the mapping table of avs_map.
+ * - L1 voltage should be about 100mv smaller than L0 voltage
+ * - L2 & L3 voltage should be about 150mv smaller than L0 voltage.
+ * This function calculates L1 & L2 & L3 AVS values dynamically based
+ * on L0 voltage and fill all AVS values to the AVS value table.
+ * When base CPU frequency is 1000 or 1200 MHz then there is additional
+ * minimal avs value for load L1.
+ */
+static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
+						struct armada_37xx_dvfs *dvfs)
+{
+	unsigned int target_vm;
+	int load_level = 0;
+	u32 l0_vdd_min;
+
+	if (base == NULL)
+		return;
+
+	/* Get L0 VDD min value */
+	regmap_read(base, ARMADA_37XX_AVS_CTL0, &l0_vdd_min);
+	l0_vdd_min = (l0_vdd_min >> ARMADA_37XX_AVS_LOW_VDD_LIMIT) &
+		ARMADA_37XX_AVS_VDD_MASK;
+	if (l0_vdd_min >= ARRAY_SIZE(avs_map))  {
+		pr_err("L0 VDD MIN %d is not correct.\n", l0_vdd_min);
+		return;
+	}
+	dvfs->avs[0] = l0_vdd_min;
+
+	if (avs_map[l0_vdd_min] <= MIN_VOLT_MV) {
+		/*
+		 * If L0 voltage is smaller than 1000mv, then all VDD sets
+		 * use L0 voltage;
+		 */
+		u32 avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV);
+
+		for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++)
+			dvfs->avs[load_level] = avs_min;
+
+		/*
+		 * Set the avs values for load L0 and L1 when base CPU frequency
+		 * is 1000/1200 MHz to its typical initial values according to
+		 * the Armada 3700 Hardware Specifications.
+		 */
+		if (dvfs->cpu_freq_max >= 1000*1000*1000) {
+			if (dvfs->cpu_freq_max >= 1200*1000*1000)
+				avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ);
+			else
+				avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ);
+			dvfs->avs[0] = dvfs->avs[1] = avs_min;
+		}
+
+		return;
+	}
+
+	/*
+	 * L1 voltage is equal to L0 voltage - 100mv and it must be
+	 * larger than 1000mv
+	 */
+
+	target_vm = avs_map[l0_vdd_min] - 100;
+	target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+	dvfs->avs[1] = armada_37xx_avs_val_match(target_vm);
+
+	/*
+	 * L2 & L3 voltage is equal to L0 voltage - 150mv and it must
+	 * be larger than 1000mv
+	 */
+	target_vm = avs_map[l0_vdd_min] - 150;
+	target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+	dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm);
+
+	/*
+	 * Fix the avs value for load L1 when base CPU frequency is 1000/1200 MHz,
+	 * otherwise the CPU gets stuck when switching from load L1 to load L0.
+	 * Also ensure that avs value for load L1 is not higher than for L0.
+	 */
+	if (dvfs->cpu_freq_max >= 1000*1000*1000) {
+		u32 avs_min_l1;
+
+		if (dvfs->cpu_freq_max >= 1200*1000*1000)
+			avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ);
+		else
+			avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ);
+
+		if (avs_min_l1 > dvfs->avs[0])
+			avs_min_l1 = dvfs->avs[0];
+
+		if (dvfs->avs[1] < avs_min_l1)
+			dvfs->avs[1] = avs_min_l1;
+	}
+}
+
+static void __init armada37xx_cpufreq_avs_setup(struct regmap *base,
+						struct armada_37xx_dvfs *dvfs)
+{
+	unsigned int avs_val = 0;
+	int load_level = 0;
+
+	if (base == NULL)
+		return;
+
+	/* Disable AVS before the configuration */
+	regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+			   ARMADA_37XX_AVS_ENABLE, 0);
+
+
+	/* Enable low voltage mode */
+	regmap_update_bits(base, ARMADA_37XX_AVS_CTL2,
+			   ARMADA_37XX_AVS_LOW_VDD_EN,
+			   ARMADA_37XX_AVS_LOW_VDD_EN);
+
+
+	for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) {
+		avs_val = dvfs->avs[load_level];
+		regmap_update_bits(base, ARMADA_37XX_AVS_VSET(load_level-1),
+		    ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+		    ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_LOW_VDD_LIMIT,
+		    avs_val << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+		    avs_val << ARMADA_37XX_AVS_LOW_VDD_LIMIT);
+	}
+
+	/* Enable AVS after the configuration */
+	regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+			   ARMADA_37XX_AVS_ENABLE,
+			   ARMADA_37XX_AVS_ENABLE);
+
+}
+
+static void armada37xx_cpufreq_disable_dvfs(struct regmap *base)
+{
+	unsigned int reg = ARMADA_37XX_NB_DYN_MOD,
+		mask = ARMADA_37XX_NB_DFS_EN;
+
+	regmap_update_bits(base, reg, mask, 0);
+}
+
+static void __init armada37xx_cpufreq_enable_dvfs(struct regmap *base)
+{
+	unsigned int val, reg = ARMADA_37XX_NB_CPU_LOAD,
+		mask = ARMADA_37XX_NB_CPU_LOAD_MASK;
+
+	/* Start with the highest load (0) */
+	val = ARMADA_37XX_DVFS_LOAD_0;
+	regmap_update_bits(base, reg, mask, val);
+
+	/* Now enable DVFS for the CPUs */
+	reg = ARMADA_37XX_NB_DYN_MOD;
+	mask =	ARMADA_37XX_NB_CLK_SEL_EN | ARMADA_37XX_NB_TBG_EN |
+		ARMADA_37XX_NB_DIV_EN | ARMADA_37XX_NB_VDD_EN |
+		ARMADA_37XX_NB_DFS_EN;
+
+	regmap_update_bits(base, reg, mask, mask);
+}
+
+static int armada37xx_cpufreq_suspend(struct cpufreq_policy *policy)
+{
+	struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state;
+
+	regmap_read(state->regmap, ARMADA_37XX_NB_L0L1, &state->nb_l0l1);
+	regmap_read(state->regmap, ARMADA_37XX_NB_L2L3, &state->nb_l2l3);
+	regmap_read(state->regmap, ARMADA_37XX_NB_CPU_LOAD,
+		    &state->nb_cpu_load);
+	regmap_read(state->regmap, ARMADA_37XX_NB_DYN_MOD, &state->nb_dyn_mod);
+
+	return 0;
+}
+
+static int armada37xx_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state;
+
+	/* Ensure DVFS is disabled otherwise the following registers are RO */
+	armada37xx_cpufreq_disable_dvfs(state->regmap);
+
+	regmap_write(state->regmap, ARMADA_37XX_NB_L0L1, state->nb_l0l1);
+	regmap_write(state->regmap, ARMADA_37XX_NB_L2L3, state->nb_l2l3);
+	regmap_write(state->regmap, ARMADA_37XX_NB_CPU_LOAD,
+		     state->nb_cpu_load);
+
+	/*
+	 * NB_DYN_MOD register is the one that actually enable back DVFS if it
+	 * was enabled before the suspend operation. This must be done last
+	 * otherwise other registers are not writable.
+	 */
+	regmap_write(state->regmap, ARMADA_37XX_NB_DYN_MOD, state->nb_dyn_mod);
+
+	return 0;
+}
+
+static int __init armada37xx_cpufreq_driver_init(void)
+{
+	struct cpufreq_dt_platform_data pdata;
+	struct armada_37xx_dvfs *dvfs;
+	struct platform_device *pdev;
+	unsigned long freq;
+	unsigned int base_frequency;
+	struct regmap *nb_clk_base, *nb_pm_base, *avs_base;
+	struct device *cpu_dev;
+	int load_lvl, ret;
+	struct clk *clk, *parent;
+
+	nb_clk_base =
+		syscon_regmap_lookup_by_compatible("marvell,armada-3700-periph-clock-nb");
+	if (IS_ERR(nb_clk_base))
+		return -ENODEV;
+
+	nb_pm_base =
+		syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm");
+
+	if (IS_ERR(nb_pm_base))
+		return -ENODEV;
+
+	avs_base =
+		syscon_regmap_lookup_by_compatible("marvell,armada-3700-avs");
+
+	/* if AVS is not present don't use it but still try to setup dvfs */
+	if (IS_ERR(avs_base)) {
+		pr_info("Syscon failed for Adapting Voltage Scaling: skip it\n");
+		avs_base = NULL;
+	}
+	/* Before doing any configuration on the DVFS first, disable it */
+	armada37xx_cpufreq_disable_dvfs(nb_pm_base);
+
+	/*
+	 * On CPU 0 register the operating points supported (which are
+	 * the nominal CPU frequency and full integer divisions of
+	 * it).
+	 */
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev) {
+		dev_err(cpu_dev, "Cannot get CPU\n");
+		return -ENODEV;
+	}
+
+	clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(cpu_dev, "Cannot get clock for CPU0\n");
+		return PTR_ERR(clk);
+	}
+
+	parent = clk_get_parent(clk);
+	if (IS_ERR(parent)) {
+		dev_err(cpu_dev, "Cannot get parent clock for CPU0\n");
+		clk_put(clk);
+		return PTR_ERR(parent);
+	}
+
+	/* Get parent CPU frequency */
+	base_frequency =  clk_get_rate(parent);
+
+	if (!base_frequency) {
+		dev_err(cpu_dev, "Failed to get parent clock rate for CPU\n");
+		clk_put(clk);
+		return -EINVAL;
+	}
+
+	dvfs = armada_37xx_cpu_freq_info_get(base_frequency);
+	if (!dvfs) {
+		clk_put(clk);
+		return -EINVAL;
+	}
+
+	armada37xx_cpufreq_state = kmalloc(sizeof(*armada37xx_cpufreq_state),
+					   GFP_KERNEL);
+	if (!armada37xx_cpufreq_state) {
+		clk_put(clk);
+		return -ENOMEM;
+	}
+
+	armada37xx_cpufreq_state->regmap = nb_pm_base;
+
+	armada37xx_cpufreq_avs_configure(avs_base, dvfs);
+	armada37xx_cpufreq_avs_setup(avs_base, dvfs);
+
+	armada37xx_cpufreq_dvfs_setup(nb_pm_base, nb_clk_base, dvfs->divider);
+	clk_put(clk);
+
+	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
+	     load_lvl++) {
+		unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000;
+		freq = base_frequency / dvfs->divider[load_lvl];
+		ret = dev_pm_opp_add(cpu_dev, freq, u_volt);
+		if (ret)
+			goto remove_opp;
+
+
+	}
+
+	/* Now that everything is setup, enable the DVFS at hardware level */
+	armada37xx_cpufreq_enable_dvfs(nb_pm_base);
+
+	memset(&pdata, 0, sizeof(pdata));
+	pdata.suspend = armada37xx_cpufreq_suspend;
+	pdata.resume = armada37xx_cpufreq_resume;
+
+	pdev = platform_device_register_data(NULL, "cpufreq-dt", -1, &pdata,
+					     sizeof(pdata));
+	ret = PTR_ERR_OR_ZERO(pdev);
+	if (ret)
+		goto disable_dvfs;
+
+	armada37xx_cpufreq_state->cpu_dev = cpu_dev;
+	armada37xx_cpufreq_state->pdev = pdev;
+	platform_set_drvdata(pdev, dvfs);
+	return 0;
+
+disable_dvfs:
+	armada37xx_cpufreq_disable_dvfs(nb_pm_base);
+remove_opp:
+	/* clean-up the already added opp before leaving */
+	while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) {
+		freq = base_frequency / dvfs->divider[load_lvl];
+		dev_pm_opp_remove(cpu_dev, freq);
+	}
+
+	kfree(armada37xx_cpufreq_state);
+
+	return ret;
+}
+/* late_initcall, to guarantee the driver is loaded after A37xx clock driver */
+late_initcall(armada37xx_cpufreq_driver_init);
+
+static void __exit armada37xx_cpufreq_driver_exit(void)
+{
+	struct platform_device *pdev = armada37xx_cpufreq_state->pdev;
+	struct armada_37xx_dvfs *dvfs = platform_get_drvdata(pdev);
+	unsigned long freq;
+	int load_lvl;
+
+	platform_device_unregister(pdev);
+
+	armada37xx_cpufreq_disable_dvfs(armada37xx_cpufreq_state->regmap);
+
+	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) {
+		freq = dvfs->cpu_freq_max / dvfs->divider[load_lvl];
+		dev_pm_opp_remove(armada37xx_cpufreq_state->cpu_dev, freq);
+	}
+
+	kfree(armada37xx_cpufreq_state);
+}
+module_exit(armada37xx_cpufreq_driver_exit);
+
+static const struct of_device_id __maybe_unused armada37xx_cpufreq_of_match[] = {
+	{ .compatible = "marvell,armada-3700-nb-pm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, armada37xx_cpufreq_of_match);
+
+MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>");
+MODULE_DESCRIPTION("Armada 37xx cpufreq driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3