aboutsummaryrefslogtreecommitdiff
path: root/lib/842/842_compress.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /lib/842/842_compress.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'lib/842/842_compress.c')
-rw-r--r--lib/842/842_compress.c630
1 files changed, 630 insertions, 0 deletions
diff --git a/lib/842/842_compress.c b/lib/842/842_compress.c
new file mode 100644
index 000000000..c02baa416
--- /dev/null
+++ b/lib/842/842_compress.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * 842 Software Compression
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * See 842.h for details of the 842 compressed format.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define MODULE_NAME "842_compress"
+
+#include <linux/hashtable.h>
+
+#include "842.h"
+#include "842_debugfs.h"
+
+#define SW842_HASHTABLE8_BITS (10)
+#define SW842_HASHTABLE4_BITS (11)
+#define SW842_HASHTABLE2_BITS (10)
+
+/* By default, we allow compressing input buffers of any length, but we must
+ * use the non-standard "short data" template so the decompressor can correctly
+ * reproduce the uncompressed data buffer at the right length. However the
+ * hardware 842 compressor will not recognize the "short data" template, and
+ * will fail to decompress any compressed buffer containing it (I have no idea
+ * why anyone would want to use software to compress and hardware to decompress
+ * but that's beside the point). This parameter forces the compression
+ * function to simply reject any input buffer that isn't a multiple of 8 bytes
+ * long, instead of using the "short data" template, so that all compressed
+ * buffers produced by this function will be decompressable by the 842 hardware
+ * decompressor. Unless you have a specific need for that, leave this disabled
+ * so that any length buffer can be compressed.
+ */
+static bool sw842_strict;
+module_param_named(strict, sw842_strict, bool, 0644);
+
+static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
+ { I8, N0, N0, N0, 0x19 }, /* 8 */
+ { I4, I4, N0, N0, 0x18 }, /* 18 */
+ { I4, I2, I2, N0, 0x17 }, /* 25 */
+ { I2, I2, I4, N0, 0x13 }, /* 25 */
+ { I2, I2, I2, I2, 0x12 }, /* 32 */
+ { I4, I2, D2, N0, 0x16 }, /* 33 */
+ { I4, D2, I2, N0, 0x15 }, /* 33 */
+ { I2, D2, I4, N0, 0x0e }, /* 33 */
+ { D2, I2, I4, N0, 0x09 }, /* 33 */
+ { I2, I2, I2, D2, 0x11 }, /* 40 */
+ { I2, I2, D2, I2, 0x10 }, /* 40 */
+ { I2, D2, I2, I2, 0x0d }, /* 40 */
+ { D2, I2, I2, I2, 0x08 }, /* 40 */
+ { I4, D4, N0, N0, 0x14 }, /* 41 */
+ { D4, I4, N0, N0, 0x04 }, /* 41 */
+ { I2, I2, D4, N0, 0x0f }, /* 48 */
+ { I2, D2, I2, D2, 0x0c }, /* 48 */
+ { I2, D4, I2, N0, 0x0b }, /* 48 */
+ { D2, I2, I2, D2, 0x07 }, /* 48 */
+ { D2, I2, D2, I2, 0x06 }, /* 48 */
+ { D4, I2, I2, N0, 0x03 }, /* 48 */
+ { I2, D2, D4, N0, 0x0a }, /* 56 */
+ { D2, I2, D4, N0, 0x05 }, /* 56 */
+ { D4, I2, D2, N0, 0x02 }, /* 56 */
+ { D4, D2, I2, N0, 0x01 }, /* 56 */
+ { D8, N0, N0, N0, 0x00 }, /* 64 */
+};
+
+struct sw842_hlist_node8 {
+ struct hlist_node node;
+ u64 data;
+ u8 index;
+};
+
+struct sw842_hlist_node4 {
+ struct hlist_node node;
+ u32 data;
+ u16 index;
+};
+
+struct sw842_hlist_node2 {
+ struct hlist_node node;
+ u16 data;
+ u8 index;
+};
+
+#define INDEX_NOT_FOUND (-1)
+#define INDEX_NOT_CHECKED (-2)
+
+struct sw842_param {
+ u8 *in;
+ u8 *instart;
+ u64 ilen;
+ u8 *out;
+ u64 olen;
+ u8 bit;
+ u64 data8[1];
+ u32 data4[2];
+ u16 data2[4];
+ int index8[1];
+ int index4[2];
+ int index2[4];
+ DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
+ DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
+ DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
+ struct sw842_hlist_node8 node8[1 << I8_BITS];
+ struct sw842_hlist_node4 node4[1 << I4_BITS];
+ struct sw842_hlist_node2 node2[1 << I2_BITS];
+};
+
+#define get_input_data(p, o, b) \
+ be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
+
+#define init_hashtable_nodes(p, b) do { \
+ int _i; \
+ hash_init((p)->htable##b); \
+ for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
+ (p)->node##b[_i].index = _i; \
+ (p)->node##b[_i].data = 0; \
+ INIT_HLIST_NODE(&(p)->node##b[_i].node); \
+ } \
+} while (0)
+
+#define find_index(p, b, n) ({ \
+ struct sw842_hlist_node##b *_n; \
+ p->index##b[n] = INDEX_NOT_FOUND; \
+ hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
+ if (p->data##b[n] == _n->data) { \
+ p->index##b[n] = _n->index; \
+ break; \
+ } \
+ } \
+ p->index##b[n] >= 0; \
+})
+
+#define check_index(p, b, n) \
+ ((p)->index##b[n] == INDEX_NOT_CHECKED \
+ ? find_index(p, b, n) \
+ : (p)->index##b[n] >= 0)
+
+#define replace_hash(p, b, i, d) do { \
+ struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \
+ hash_del(&_n->node); \
+ _n->data = (p)->data##b[d]; \
+ pr_debug("add hash index%x %x pos %x data %lx\n", b, \
+ (unsigned int)_n->index, \
+ (unsigned int)((p)->in - (p)->instart), \
+ (unsigned long)_n->data); \
+ hash_add((p)->htable##b, &_n->node, _n->data); \
+} while (0)
+
+static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
+
+static int add_bits(struct sw842_param *p, u64 d, u8 n);
+
+static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
+{
+ int ret;
+
+ if (n <= s)
+ return -EINVAL;
+
+ ret = add_bits(p, d >> s, n - s);
+ if (ret)
+ return ret;
+ return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
+}
+
+static int add_bits(struct sw842_param *p, u64 d, u8 n)
+{
+ int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
+ u64 o;
+ u8 *out = p->out;
+
+ pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
+
+ if (n > 64)
+ return -EINVAL;
+
+ /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
+ * or if we're at the end of the output buffer and would write past end
+ */
+ if (bits > 64)
+ return __split_add_bits(p, d, n, 32);
+ else if (p->olen < 8 && bits > 32 && bits <= 56)
+ return __split_add_bits(p, d, n, 16);
+ else if (p->olen < 4 && bits > 16 && bits <= 24)
+ return __split_add_bits(p, d, n, 8);
+
+ if (DIV_ROUND_UP(bits, 8) > p->olen)
+ return -ENOSPC;
+
+ o = *out & bmask[b];
+ d <<= s;
+
+ if (bits <= 8)
+ *out = o | d;
+ else if (bits <= 16)
+ put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
+ else if (bits <= 24)
+ put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
+ else if (bits <= 32)
+ put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
+ else if (bits <= 40)
+ put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
+ else if (bits <= 48)
+ put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
+ else if (bits <= 56)
+ put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
+ else
+ put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
+
+ p->bit += n;
+
+ if (p->bit > 7) {
+ p->out += p->bit / 8;
+ p->olen -= p->bit / 8;
+ p->bit %= 8;
+ }
+
+ return 0;
+}
+
+static int add_template(struct sw842_param *p, u8 c)
+{
+ int ret, i, b = 0;
+ u8 *t = comp_ops[c];
+ bool inv = false;
+
+ if (c >= OPS_MAX)
+ return -EINVAL;
+
+ pr_debug("template %x\n", t[4]);
+
+ ret = add_bits(p, t[4], OP_BITS);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < 4; i++) {
+ pr_debug("op %x\n", t[i]);
+
+ switch (t[i] & OP_AMOUNT) {
+ case OP_AMOUNT_8:
+ if (b)
+ inv = true;
+ else if (t[i] & OP_ACTION_INDEX)
+ ret = add_bits(p, p->index8[0], I8_BITS);
+ else if (t[i] & OP_ACTION_DATA)
+ ret = add_bits(p, p->data8[0], 64);
+ else
+ inv = true;
+ break;
+ case OP_AMOUNT_4:
+ if (b == 2 && t[i] & OP_ACTION_DATA)
+ ret = add_bits(p, get_input_data(p, 2, 32), 32);
+ else if (b != 0 && b != 4)
+ inv = true;
+ else if (t[i] & OP_ACTION_INDEX)
+ ret = add_bits(p, p->index4[b >> 2], I4_BITS);
+ else if (t[i] & OP_ACTION_DATA)
+ ret = add_bits(p, p->data4[b >> 2], 32);
+ else
+ inv = true;
+ break;
+ case OP_AMOUNT_2:
+ if (b != 0 && b != 2 && b != 4 && b != 6)
+ inv = true;
+ if (t[i] & OP_ACTION_INDEX)
+ ret = add_bits(p, p->index2[b >> 1], I2_BITS);
+ else if (t[i] & OP_ACTION_DATA)
+ ret = add_bits(p, p->data2[b >> 1], 16);
+ else
+ inv = true;
+ break;
+ case OP_AMOUNT_0:
+ inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
+ break;
+ default:
+ inv = true;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ if (inv) {
+ pr_err("Invalid templ %x op %d : %x %x %x %x\n",
+ c, i, t[0], t[1], t[2], t[3]);
+ return -EINVAL;
+ }
+
+ b += t[i] & OP_AMOUNT;
+ }
+
+ if (b != 8) {
+ pr_err("Invalid template %x len %x : %x %x %x %x\n",
+ c, b, t[0], t[1], t[2], t[3]);
+ return -EINVAL;
+ }
+
+ if (sw842_template_counts)
+ atomic_inc(&template_count[t[4]]);
+
+ return 0;
+}
+
+static int add_repeat_template(struct sw842_param *p, u8 r)
+{
+ int ret;
+
+ /* repeat param is 0-based */
+ if (!r || --r > REPEAT_BITS_MAX)
+ return -EINVAL;
+
+ ret = add_bits(p, OP_REPEAT, OP_BITS);
+ if (ret)
+ return ret;
+
+ ret = add_bits(p, r, REPEAT_BITS);
+ if (ret)
+ return ret;
+
+ if (sw842_template_counts)
+ atomic_inc(&template_repeat_count);
+
+ return 0;
+}
+
+static int add_short_data_template(struct sw842_param *p, u8 b)
+{
+ int ret, i;
+
+ if (!b || b > SHORT_DATA_BITS_MAX)
+ return -EINVAL;
+
+ ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
+ if (ret)
+ return ret;
+
+ ret = add_bits(p, b, SHORT_DATA_BITS);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < b; i++) {
+ ret = add_bits(p, p->in[i], 8);
+ if (ret)
+ return ret;
+ }
+
+ if (sw842_template_counts)
+ atomic_inc(&template_short_data_count);
+
+ return 0;
+}
+
+static int add_zeros_template(struct sw842_param *p)
+{
+ int ret = add_bits(p, OP_ZEROS, OP_BITS);
+
+ if (ret)
+ return ret;
+
+ if (sw842_template_counts)
+ atomic_inc(&template_zeros_count);
+
+ return 0;
+}
+
+static int add_end_template(struct sw842_param *p)
+{
+ int ret = add_bits(p, OP_END, OP_BITS);
+
+ if (ret)
+ return ret;
+
+ if (sw842_template_counts)
+ atomic_inc(&template_end_count);
+
+ return 0;
+}
+
+static bool check_template(struct sw842_param *p, u8 c)
+{
+ u8 *t = comp_ops[c];
+ int i, match, b = 0;
+
+ if (c >= OPS_MAX)
+ return false;
+
+ for (i = 0; i < 4; i++) {
+ if (t[i] & OP_ACTION_INDEX) {
+ if (t[i] & OP_AMOUNT_2)
+ match = check_index(p, 2, b >> 1);
+ else if (t[i] & OP_AMOUNT_4)
+ match = check_index(p, 4, b >> 2);
+ else if (t[i] & OP_AMOUNT_8)
+ match = check_index(p, 8, 0);
+ else
+ return false;
+ if (!match)
+ return false;
+ }
+
+ b += t[i] & OP_AMOUNT;
+ }
+
+ return true;
+}
+
+static void get_next_data(struct sw842_param *p)
+{
+ p->data8[0] = get_input_data(p, 0, 64);
+ p->data4[0] = get_input_data(p, 0, 32);
+ p->data4[1] = get_input_data(p, 4, 32);
+ p->data2[0] = get_input_data(p, 0, 16);
+ p->data2[1] = get_input_data(p, 2, 16);
+ p->data2[2] = get_input_data(p, 4, 16);
+ p->data2[3] = get_input_data(p, 6, 16);
+}
+
+/* update the hashtable entries.
+ * only call this after finding/adding the current template
+ * the dataN fields for the current 8 byte block must be already updated
+ */
+static void update_hashtables(struct sw842_param *p)
+{
+ u64 pos = p->in - p->instart;
+ u64 n8 = (pos >> 3) % (1 << I8_BITS);
+ u64 n4 = (pos >> 2) % (1 << I4_BITS);
+ u64 n2 = (pos >> 1) % (1 << I2_BITS);
+
+ replace_hash(p, 8, n8, 0);
+ replace_hash(p, 4, n4, 0);
+ replace_hash(p, 4, n4, 1);
+ replace_hash(p, 2, n2, 0);
+ replace_hash(p, 2, n2, 1);
+ replace_hash(p, 2, n2, 2);
+ replace_hash(p, 2, n2, 3);
+}
+
+/* find the next template to use, and add it
+ * the p->dataN fields must already be set for the current 8 byte block
+ */
+static int process_next(struct sw842_param *p)
+{
+ int ret, i;
+
+ p->index8[0] = INDEX_NOT_CHECKED;
+ p->index4[0] = INDEX_NOT_CHECKED;
+ p->index4[1] = INDEX_NOT_CHECKED;
+ p->index2[0] = INDEX_NOT_CHECKED;
+ p->index2[1] = INDEX_NOT_CHECKED;
+ p->index2[2] = INDEX_NOT_CHECKED;
+ p->index2[3] = INDEX_NOT_CHECKED;
+
+ /* check up to OPS_MAX - 1; last op is our fallback */
+ for (i = 0; i < OPS_MAX - 1; i++) {
+ if (check_template(p, i))
+ break;
+ }
+
+ ret = add_template(p, i);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * sw842_compress
+ *
+ * Compress the uncompressed buffer of length @ilen at @in to the output buffer
+ * @out, using no more than @olen bytes, using the 842 compression format.
+ *
+ * Returns: 0 on success, error on failure. The @olen parameter
+ * will contain the number of output bytes written on success, or
+ * 0 on error.
+ */
+int sw842_compress(const u8 *in, unsigned int ilen,
+ u8 *out, unsigned int *olen, void *wmem)
+{
+ struct sw842_param *p = (struct sw842_param *)wmem;
+ int ret;
+ u64 last, next, pad, total;
+ u8 repeat_count = 0;
+ u32 crc;
+
+ BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
+
+ init_hashtable_nodes(p, 8);
+ init_hashtable_nodes(p, 4);
+ init_hashtable_nodes(p, 2);
+
+ p->in = (u8 *)in;
+ p->instart = p->in;
+ p->ilen = ilen;
+ p->out = out;
+ p->olen = *olen;
+ p->bit = 0;
+
+ total = p->olen;
+
+ *olen = 0;
+
+ /* if using strict mode, we can only compress a multiple of 8 */
+ if (sw842_strict && (ilen % 8)) {
+ pr_err("Using strict mode, can't compress len %d\n", ilen);
+ return -EINVAL;
+ }
+
+ /* let's compress at least 8 bytes, mkay? */
+ if (unlikely(ilen < 8))
+ goto skip_comp;
+
+ /* make initial 'last' different so we don't match the first time */
+ last = ~get_unaligned((u64 *)p->in);
+
+ while (p->ilen > 7) {
+ next = get_unaligned((u64 *)p->in);
+
+ /* must get the next data, as we need to update the hashtable
+ * entries with the new data every time
+ */
+ get_next_data(p);
+
+ /* we don't care about endianness in last or next;
+ * we're just comparing 8 bytes to another 8 bytes,
+ * they're both the same endianness
+ */
+ if (next == last) {
+ /* repeat count bits are 0-based, so we stop at +1 */
+ if (++repeat_count <= REPEAT_BITS_MAX)
+ goto repeat;
+ }
+ if (repeat_count) {
+ ret = add_repeat_template(p, repeat_count);
+ repeat_count = 0;
+ if (next == last) /* reached max repeat bits */
+ goto repeat;
+ }
+
+ if (next == 0)
+ ret = add_zeros_template(p);
+ else
+ ret = process_next(p);
+
+ if (ret)
+ return ret;
+
+repeat:
+ last = next;
+ update_hashtables(p);
+ p->in += 8;
+ p->ilen -= 8;
+ }
+
+ if (repeat_count) {
+ ret = add_repeat_template(p, repeat_count);
+ if (ret)
+ return ret;
+ }
+
+skip_comp:
+ if (p->ilen > 0) {
+ ret = add_short_data_template(p, p->ilen);
+ if (ret)
+ return ret;
+
+ p->in += p->ilen;
+ p->ilen = 0;
+ }
+
+ ret = add_end_template(p);
+ if (ret)
+ return ret;
+
+ /*
+ * crc(0:31) is appended to target data starting with the next
+ * bit after End of stream template.
+ * nx842 calculates CRC for data in big-endian format. So doing
+ * same here so that sw842 decompression can be used for both
+ * compressed data.
+ */
+ crc = crc32_be(0, in, ilen);
+ ret = add_bits(p, crc, CRC_BITS);
+ if (ret)
+ return ret;
+
+ if (p->bit) {
+ p->out++;
+ p->olen--;
+ p->bit = 0;
+ }
+
+ /* pad compressed length to multiple of 8 */
+ pad = (8 - ((total - p->olen) % 8)) % 8;
+ if (pad) {
+ if (pad > p->olen) /* we were so close! */
+ return -ENOSPC;
+ memset(p->out, 0, pad);
+ p->out += pad;
+ p->olen -= pad;
+ }
+
+ if (unlikely((total - p->olen) > UINT_MAX))
+ return -ENOSPC;
+
+ *olen = total - p->olen;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sw842_compress);
+
+static int __init sw842_init(void)
+{
+ if (sw842_template_counts)
+ sw842_debugfs_create();
+
+ return 0;
+}
+module_init(sw842_init);
+
+static void __exit sw842_exit(void)
+{
+ if (sw842_template_counts)
+ sw842_debugfs_remove();
+}
+module_exit(sw842_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Software 842 Compressor");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");