aboutsummaryrefslogtreecommitdiff
path: root/fs/ioctl.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /fs/ioctl.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to 'fs/ioctl.c')
-rw-r--r--fs/ioctl.c979
1 files changed, 979 insertions, 0 deletions
diff --git a/fs/ioctl.c b/fs/ioctl.c
new file mode 100644
index 000000000..5b2481cd4
--- /dev/null
+++ b/fs/ioctl.c
@@ -0,0 +1,979 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/fs/ioctl.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/security.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
+#include <linux/falloc.h>
+#include <linux/sched/signal.h>
+#include <linux/fiemap.h>
+#include <linux/mount.h>
+#include <linux/fscrypt.h>
+#include <linux/fileattr.h>
+
+#include "internal.h"
+
+#include <asm/ioctls.h>
+
+/* So that the fiemap access checks can't overflow on 32 bit machines. */
+#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
+
+/**
+ * vfs_ioctl - call filesystem specific ioctl methods
+ * @filp: open file to invoke ioctl method on
+ * @cmd: ioctl command to execute
+ * @arg: command-specific argument for ioctl
+ *
+ * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise
+ * returns -ENOTTY.
+ *
+ * Returns 0 on success, -errno on error.
+ */
+long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ int error = -ENOTTY;
+
+ if (!filp->f_op->unlocked_ioctl)
+ goto out;
+
+ error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+ if (error == -ENOIOCTLCMD)
+ error = -ENOTTY;
+ out:
+ return error;
+}
+EXPORT_SYMBOL(vfs_ioctl);
+
+static int ioctl_fibmap(struct file *filp, int __user *p)
+{
+ struct inode *inode = file_inode(filp);
+ struct super_block *sb = inode->i_sb;
+ int error, ur_block;
+ sector_t block;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ error = get_user(ur_block, p);
+ if (error)
+ return error;
+
+ if (ur_block < 0)
+ return -EINVAL;
+
+ block = ur_block;
+ error = bmap(inode, &block);
+
+ if (block > INT_MAX) {
+ error = -ERANGE;
+ pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n",
+ current->comm, task_pid_nr(current),
+ sb->s_id, filp);
+ }
+
+ if (error)
+ ur_block = 0;
+ else
+ ur_block = block;
+
+ if (put_user(ur_block, p))
+ error = -EFAULT;
+
+ return error;
+}
+
+/**
+ * fiemap_fill_next_extent - Fiemap helper function
+ * @fieinfo: Fiemap context passed into ->fiemap
+ * @logical: Extent logical start offset, in bytes
+ * @phys: Extent physical start offset, in bytes
+ * @len: Extent length, in bytes
+ * @flags: FIEMAP_EXTENT flags that describe this extent
+ *
+ * Called from file system ->fiemap callback. Will populate extent
+ * info as passed in via arguments and copy to user memory. On
+ * success, extent count on fieinfo is incremented.
+ *
+ * Returns 0 on success, -errno on error, 1 if this was the last
+ * extent that will fit in user array.
+ */
+#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
+#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
+#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
+ u64 phys, u64 len, u32 flags)
+{
+ struct fiemap_extent extent;
+ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
+
+ /* only count the extents */
+ if (fieinfo->fi_extents_max == 0) {
+ fieinfo->fi_extents_mapped++;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+ }
+
+ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+ return 1;
+
+ if (flags & SET_UNKNOWN_FLAGS)
+ flags |= FIEMAP_EXTENT_UNKNOWN;
+ if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
+ flags |= FIEMAP_EXTENT_ENCODED;
+ if (flags & SET_NOT_ALIGNED_FLAGS)
+ flags |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+ memset(&extent, 0, sizeof(extent));
+ extent.fe_logical = logical;
+ extent.fe_physical = phys;
+ extent.fe_length = len;
+ extent.fe_flags = flags;
+
+ dest += fieinfo->fi_extents_mapped;
+ if (copy_to_user(dest, &extent, sizeof(extent)))
+ return -EFAULT;
+
+ fieinfo->fi_extents_mapped++;
+ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
+ return 1;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+}
+EXPORT_SYMBOL(fiemap_fill_next_extent);
+
+/**
+ * fiemap_prep - check validity of requested flags for fiemap
+ * @inode: Inode to operate on
+ * @fieinfo: Fiemap context passed into ->fiemap
+ * @start: Start of the mapped range
+ * @len: Length of the mapped range, can be truncated by this function.
+ * @supported_flags: Set of fiemap flags that the file system understands
+ *
+ * This function must be called from each ->fiemap instance to validate the
+ * fiemap request against the file system parameters.
+ *
+ * Returns 0 on success, or a negative error on failure.
+ */
+int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 *len, u32 supported_flags)
+{
+ u64 maxbytes = inode->i_sb->s_maxbytes;
+ u32 incompat_flags;
+ int ret = 0;
+
+ if (*len == 0)
+ return -EINVAL;
+ if (start >= maxbytes)
+ return -EFBIG;
+
+ /*
+ * Shrink request scope to what the fs can actually handle.
+ */
+ if (*len > maxbytes || (maxbytes - *len) < start)
+ *len = maxbytes - start;
+
+ supported_flags |= FIEMAP_FLAG_SYNC;
+ supported_flags &= FIEMAP_FLAGS_COMPAT;
+ incompat_flags = fieinfo->fi_flags & ~supported_flags;
+ if (incompat_flags) {
+ fieinfo->fi_flags = incompat_flags;
+ return -EBADR;
+ }
+
+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
+ ret = filemap_write_and_wait(inode->i_mapping);
+ return ret;
+}
+EXPORT_SYMBOL(fiemap_prep);
+
+static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
+{
+ struct fiemap fiemap;
+ struct fiemap_extent_info fieinfo = { 0, };
+ struct inode *inode = file_inode(filp);
+ int error;
+
+ if (!inode->i_op->fiemap)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
+ return -EFAULT;
+
+ if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
+ return -EINVAL;
+
+ fieinfo.fi_flags = fiemap.fm_flags;
+ fieinfo.fi_extents_max = fiemap.fm_extent_count;
+ fieinfo.fi_extents_start = ufiemap->fm_extents;
+
+ error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start,
+ fiemap.fm_length);
+
+ fiemap.fm_flags = fieinfo.fi_flags;
+ fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
+ if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
+ error = -EFAULT;
+
+ return error;
+}
+
+static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
+{
+ struct fd src_file = fdget(srcfd);
+ loff_t cloned;
+ int ret;
+
+ if (!src_file.file)
+ return -EBADF;
+ cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
+ olen, 0);
+ if (cloned < 0)
+ ret = cloned;
+ else if (olen && cloned != olen)
+ ret = -EINVAL;
+ else
+ ret = 0;
+ fdput(src_file);
+ return ret;
+}
+
+static long ioctl_file_clone_range(struct file *file,
+ struct file_clone_range __user *argp)
+{
+ struct file_clone_range args;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+ return ioctl_file_clone(file, args.src_fd, args.src_offset,
+ args.src_length, args.dest_offset);
+}
+
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+static int ioctl_preallocate(struct file *filp, int mode, void __user *argp)
+{
+ struct inode *inode = file_inode(filp);
+ struct space_resv sr;
+
+ if (copy_from_user(&sr, argp, sizeof(sr)))
+ return -EFAULT;
+
+ switch (sr.l_whence) {
+ case SEEK_SET:
+ break;
+ case SEEK_CUR:
+ sr.l_start += filp->f_pos;
+ break;
+ case SEEK_END:
+ sr.l_start += i_size_read(inode);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start,
+ sr.l_len);
+}
+
+/* on ia32 l_start is on a 32-bit boundary */
+#if defined CONFIG_COMPAT && defined(CONFIG_X86_64)
+/* just account for different alignment */
+static int compat_ioctl_preallocate(struct file *file, int mode,
+ struct space_resv_32 __user *argp)
+{
+ struct inode *inode = file_inode(file);
+ struct space_resv_32 sr;
+
+ if (copy_from_user(&sr, argp, sizeof(sr)))
+ return -EFAULT;
+
+ switch (sr.l_whence) {
+ case SEEK_SET:
+ break;
+ case SEEK_CUR:
+ sr.l_start += file->f_pos;
+ break;
+ case SEEK_END:
+ sr.l_start += i_size_read(inode);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+#endif
+
+static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p)
+{
+ switch (cmd) {
+ case FIBMAP:
+ return ioctl_fibmap(filp, p);
+ case FS_IOC_RESVSP:
+ case FS_IOC_RESVSP64:
+ return ioctl_preallocate(filp, 0, p);
+ case FS_IOC_UNRESVSP:
+ case FS_IOC_UNRESVSP64:
+ return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p);
+ case FS_IOC_ZERO_RANGE:
+ return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+static int ioctl_fionbio(struct file *filp, int __user *argp)
+{
+ unsigned int flag;
+ int on, error;
+
+ error = get_user(on, argp);
+ if (error)
+ return error;
+ flag = O_NONBLOCK;
+#ifdef __sparc__
+ /* SunOS compatibility item. */
+ if (O_NONBLOCK != O_NDELAY)
+ flag |= O_NDELAY;
+#endif
+ spin_lock(&filp->f_lock);
+ if (on)
+ filp->f_flags |= flag;
+ else
+ filp->f_flags &= ~flag;
+ spin_unlock(&filp->f_lock);
+ return error;
+}
+
+static int ioctl_fioasync(unsigned int fd, struct file *filp,
+ int __user *argp)
+{
+ unsigned int flag;
+ int on, error;
+
+ error = get_user(on, argp);
+ if (error)
+ return error;
+ flag = on ? FASYNC : 0;
+
+ /* Did FASYNC state change ? */
+ if ((flag ^ filp->f_flags) & FASYNC) {
+ if (filp->f_op->fasync)
+ /* fasync() adjusts filp->f_flags */
+ error = filp->f_op->fasync(fd, filp, on);
+ else
+ error = -ENOTTY;
+ }
+ return error < 0 ? error : 0;
+}
+
+static int ioctl_fsfreeze(struct file *filp)
+{
+ struct super_block *sb = file_inode(filp)->i_sb;
+
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* If filesystem doesn't support freeze feature, return. */
+ if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
+ return -EOPNOTSUPP;
+
+ /* Freeze */
+ if (sb->s_op->freeze_super)
+ return sb->s_op->freeze_super(sb);
+ return freeze_super(sb);
+}
+
+static int ioctl_fsthaw(struct file *filp)
+{
+ struct super_block *sb = file_inode(filp)->i_sb;
+
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* Thaw */
+ if (sb->s_op->thaw_super)
+ return sb->s_op->thaw_super(sb);
+ return thaw_super(sb);
+}
+
+static int ioctl_file_dedupe_range(struct file *file,
+ struct file_dedupe_range __user *argp)
+{
+ struct file_dedupe_range *same = NULL;
+ int ret;
+ unsigned long size;
+ u16 count;
+
+ if (get_user(count, &argp->dest_count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ size = offsetof(struct file_dedupe_range, info[count]);
+ if (size > PAGE_SIZE) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ same = memdup_user(argp, size);
+ if (IS_ERR(same)) {
+ ret = PTR_ERR(same);
+ same = NULL;
+ goto out;
+ }
+
+ same->dest_count = count;
+ ret = vfs_dedupe_file_range(file, same);
+ if (ret)
+ goto out;
+
+ ret = copy_to_user(argp, same, size);
+ if (ret)
+ ret = -EFAULT;
+
+out:
+ kfree(same);
+ return ret;
+}
+
+/**
+ * fileattr_fill_xflags - initialize fileattr with xflags
+ * @fa: fileattr pointer
+ * @xflags: FS_XFLAG_* flags
+ *
+ * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags). All
+ * other fields are zeroed.
+ */
+void fileattr_fill_xflags(struct fileattr *fa, u32 xflags)
+{
+ memset(fa, 0, sizeof(*fa));
+ fa->fsx_valid = true;
+ fa->fsx_xflags = xflags;
+ if (fa->fsx_xflags & FS_XFLAG_IMMUTABLE)
+ fa->flags |= FS_IMMUTABLE_FL;
+ if (fa->fsx_xflags & FS_XFLAG_APPEND)
+ fa->flags |= FS_APPEND_FL;
+ if (fa->fsx_xflags & FS_XFLAG_SYNC)
+ fa->flags |= FS_SYNC_FL;
+ if (fa->fsx_xflags & FS_XFLAG_NOATIME)
+ fa->flags |= FS_NOATIME_FL;
+ if (fa->fsx_xflags & FS_XFLAG_NODUMP)
+ fa->flags |= FS_NODUMP_FL;
+ if (fa->fsx_xflags & FS_XFLAG_DAX)
+ fa->flags |= FS_DAX_FL;
+ if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
+ fa->flags |= FS_PROJINHERIT_FL;
+}
+EXPORT_SYMBOL(fileattr_fill_xflags);
+
+/**
+ * fileattr_fill_flags - initialize fileattr with flags
+ * @fa: fileattr pointer
+ * @flags: FS_*_FL flags
+ *
+ * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
+ * All other fields are zeroed.
+ */
+void fileattr_fill_flags(struct fileattr *fa, u32 flags)
+{
+ memset(fa, 0, sizeof(*fa));
+ fa->flags_valid = true;
+ fa->flags = flags;
+ if (fa->flags & FS_SYNC_FL)
+ fa->fsx_xflags |= FS_XFLAG_SYNC;
+ if (fa->flags & FS_IMMUTABLE_FL)
+ fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+ if (fa->flags & FS_APPEND_FL)
+ fa->fsx_xflags |= FS_XFLAG_APPEND;
+ if (fa->flags & FS_NODUMP_FL)
+ fa->fsx_xflags |= FS_XFLAG_NODUMP;
+ if (fa->flags & FS_NOATIME_FL)
+ fa->fsx_xflags |= FS_XFLAG_NOATIME;
+ if (fa->flags & FS_DAX_FL)
+ fa->fsx_xflags |= FS_XFLAG_DAX;
+ if (fa->flags & FS_PROJINHERIT_FL)
+ fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
+}
+EXPORT_SYMBOL(fileattr_fill_flags);
+
+/**
+ * vfs_fileattr_get - retrieve miscellaneous file attributes
+ * @dentry: the object to retrieve from
+ * @fa: fileattr pointer
+ *
+ * Call i_op->fileattr_get() callback, if exists.
+ *
+ * Return: 0 on success, or a negative error on failure.
+ */
+int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (!inode->i_op->fileattr_get)
+ return -ENOIOCTLCMD;
+
+ return inode->i_op->fileattr_get(dentry, fa);
+}
+EXPORT_SYMBOL(vfs_fileattr_get);
+
+/**
+ * copy_fsxattr_to_user - copy fsxattr to userspace.
+ * @fa: fileattr pointer
+ * @ufa: fsxattr user pointer
+ *
+ * Return: 0 on success, or -EFAULT on failure.
+ */
+int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa)
+{
+ struct fsxattr xfa;
+
+ memset(&xfa, 0, sizeof(xfa));
+ xfa.fsx_xflags = fa->fsx_xflags;
+ xfa.fsx_extsize = fa->fsx_extsize;
+ xfa.fsx_nextents = fa->fsx_nextents;
+ xfa.fsx_projid = fa->fsx_projid;
+ xfa.fsx_cowextsize = fa->fsx_cowextsize;
+
+ if (copy_to_user(ufa, &xfa, sizeof(xfa)))
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL(copy_fsxattr_to_user);
+
+static int copy_fsxattr_from_user(struct fileattr *fa,
+ struct fsxattr __user *ufa)
+{
+ struct fsxattr xfa;
+
+ if (copy_from_user(&xfa, ufa, sizeof(xfa)))
+ return -EFAULT;
+
+ fileattr_fill_xflags(fa, xfa.fsx_xflags);
+ fa->fsx_extsize = xfa.fsx_extsize;
+ fa->fsx_nextents = xfa.fsx_nextents;
+ fa->fsx_projid = xfa.fsx_projid;
+ fa->fsx_cowextsize = xfa.fsx_cowextsize;
+
+ return 0;
+}
+
+/*
+ * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
+ * any invalid configurations.
+ *
+ * Note: must be called with inode lock held.
+ */
+static int fileattr_set_prepare(struct inode *inode,
+ const struct fileattr *old_ma,
+ struct fileattr *fa)
+{
+ int err;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ */
+ if ((fa->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ err = fscrypt_prepare_setflags(inode, old_ma->flags, fa->flags);
+ if (err)
+ return err;
+
+ /*
+ * Project Quota ID state is only allowed to change from within the init
+ * namespace. Enforce that restriction only if we are trying to change
+ * the quota ID state. Everything else is allowed in user namespaces.
+ */
+ if (current_user_ns() != &init_user_ns) {
+ if (old_ma->fsx_projid != fa->fsx_projid)
+ return -EINVAL;
+ if ((old_ma->fsx_xflags ^ fa->fsx_xflags) &
+ FS_XFLAG_PROJINHERIT)
+ return -EINVAL;
+ } else {
+ /*
+ * Caller is allowed to change the project ID. If it is being
+ * changed, make sure that the new value is valid.
+ */
+ if (old_ma->fsx_projid != fa->fsx_projid &&
+ !projid_valid(make_kprojid(&init_user_ns, fa->fsx_projid)))
+ return -EINVAL;
+ }
+
+ /* Check extent size hints. */
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
+ !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
+ !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ /*
+ * It is only valid to set the DAX flag on regular files and
+ * directories on filesystems.
+ */
+ if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
+ !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ return -EINVAL;
+
+ /* Extent size hints of zero turn off the flags. */
+ if (fa->fsx_extsize == 0)
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+ if (fa->fsx_cowextsize == 0)
+ fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+
+ return 0;
+}
+
+/**
+ * vfs_fileattr_set - change miscellaneous file attributes
+ * @idmap: idmap of the mount
+ * @dentry: the object to change
+ * @fa: fileattr pointer
+ *
+ * After verifying permissions, call i_op->fileattr_set() callback, if
+ * exists.
+ *
+ * Verifying attributes involves retrieving current attributes with
+ * i_op->fileattr_get(), this also allows initializing attributes that have
+ * not been set by the caller to current values. Inode lock is held
+ * thoughout to prevent racing with another instance.
+ *
+ * Return: 0 on success, or a negative error on failure.
+ */
+int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fileattr old_ma = {};
+ int err;
+
+ if (!inode->i_op->fileattr_set)
+ return -ENOIOCTLCMD;
+
+ if (!inode_owner_or_capable(idmap, inode))
+ return -EPERM;
+
+ inode_lock(inode);
+ err = vfs_fileattr_get(dentry, &old_ma);
+ if (!err) {
+ /* initialize missing bits from old_ma */
+ if (fa->flags_valid) {
+ fa->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
+ fa->fsx_extsize = old_ma.fsx_extsize;
+ fa->fsx_nextents = old_ma.fsx_nextents;
+ fa->fsx_projid = old_ma.fsx_projid;
+ fa->fsx_cowextsize = old_ma.fsx_cowextsize;
+ } else {
+ fa->flags |= old_ma.flags & ~FS_COMMON_FL;
+ }
+ err = fileattr_set_prepare(inode, &old_ma, fa);
+ if (!err)
+ err = inode->i_op->fileattr_set(idmap, dentry, fa);
+ }
+ inode_unlock(inode);
+
+ return err;
+}
+EXPORT_SYMBOL(vfs_fileattr_set);
+
+static int ioctl_getflags(struct file *file, unsigned int __user *argp)
+{
+ struct fileattr fa = { .flags_valid = true }; /* hint only */
+ int err;
+
+ err = vfs_fileattr_get(file->f_path.dentry, &fa);
+ if (!err)
+ err = put_user(fa.flags, argp);
+ return err;
+}
+
+static int ioctl_setflags(struct file *file, unsigned int __user *argp)
+{
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
+ struct dentry *dentry = file->f_path.dentry;
+ struct fileattr fa;
+ unsigned int flags;
+ int err;
+
+ err = get_user(flags, argp);
+ if (!err) {
+ err = mnt_want_write_file(file);
+ if (!err) {
+ fileattr_fill_flags(&fa, flags);
+ err = vfs_fileattr_set(idmap, dentry, &fa);
+ mnt_drop_write_file(file);
+ }
+ }
+ return err;
+}
+
+static int ioctl_fsgetxattr(struct file *file, void __user *argp)
+{
+ struct fileattr fa = { .fsx_valid = true }; /* hint only */
+ int err;
+
+ err = vfs_fileattr_get(file->f_path.dentry, &fa);
+ if (!err)
+ err = copy_fsxattr_to_user(&fa, argp);
+
+ return err;
+}
+
+static int ioctl_fssetxattr(struct file *file, void __user *argp)
+{
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
+ struct dentry *dentry = file->f_path.dentry;
+ struct fileattr fa;
+ int err;
+
+ err = copy_fsxattr_from_user(&fa, argp);
+ if (!err) {
+ err = mnt_want_write_file(file);
+ if (!err) {
+ err = vfs_fileattr_set(idmap, dentry, &fa);
+ mnt_drop_write_file(file);
+ }
+ }
+ return err;
+}
+
+/*
+ * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
+ * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
+ *
+ * When you add any new common ioctls to the switches above and below,
+ * please ensure they have compatible arguments in compat mode.
+ */
+static int do_vfs_ioctl(struct file *filp, unsigned int fd,
+ unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct inode *inode = file_inode(filp);
+
+ switch (cmd) {
+ case FIOCLEX:
+ set_close_on_exec(fd, 1);
+ return 0;
+
+ case FIONCLEX:
+ set_close_on_exec(fd, 0);
+ return 0;
+
+ case FIONBIO:
+ return ioctl_fionbio(filp, argp);
+
+ case FIOASYNC:
+ return ioctl_fioasync(fd, filp, argp);
+
+ case FIOQSIZE:
+ if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) {
+ loff_t res = inode_get_bytes(inode);
+ return copy_to_user(argp, &res, sizeof(res)) ?
+ -EFAULT : 0;
+ }
+
+ return -ENOTTY;
+
+ case FIFREEZE:
+ return ioctl_fsfreeze(filp);
+
+ case FITHAW:
+ return ioctl_fsthaw(filp);
+
+ case FS_IOC_FIEMAP:
+ return ioctl_fiemap(filp, argp);
+
+ case FIGETBSZ:
+ /* anon_bdev filesystems may not have a block size */
+ if (!inode->i_sb->s_blocksize)
+ return -EINVAL;
+
+ return put_user(inode->i_sb->s_blocksize, (int __user *)argp);
+
+ case FICLONE:
+ return ioctl_file_clone(filp, arg, 0, 0, 0);
+
+ case FICLONERANGE:
+ return ioctl_file_clone_range(filp, argp);
+
+ case FIDEDUPERANGE:
+ return ioctl_file_dedupe_range(filp, argp);
+
+ case FIONREAD:
+ if (!S_ISREG(inode->i_mode))
+ return vfs_ioctl(filp, cmd, arg);
+
+ return put_user(i_size_read(inode) - filp->f_pos,
+ (int __user *)argp);
+
+ case FS_IOC_GETFLAGS:
+ return ioctl_getflags(filp, argp);
+
+ case FS_IOC_SETFLAGS:
+ return ioctl_setflags(filp, argp);
+
+ case FS_IOC_FSGETXATTR:
+ return ioctl_fsgetxattr(filp, argp);
+
+ case FS_IOC_FSSETXATTR:
+ return ioctl_fssetxattr(filp, argp);
+
+ default:
+ if (S_ISREG(inode->i_mode))
+ return file_ioctl(filp, cmd, argp);
+ break;
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
+{
+ struct fd f = fdget(fd);
+ int error;
+
+ if (!f.file)
+ return -EBADF;
+
+ error = security_file_ioctl(f.file, cmd, arg);
+ if (error)
+ goto out;
+
+ error = do_vfs_ioctl(f.file, fd, cmd, arg);
+ if (error == -ENOIOCTLCMD)
+ error = vfs_ioctl(f.file, cmd, arg);
+
+out:
+ fdput(f);
+ return error;
+}
+
+#ifdef CONFIG_COMPAT
+/**
+ * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
+ *
+ * This is not normally called as a function, but instead set in struct
+ * file_operations as
+ *
+ * .compat_ioctl = compat_ptr_ioctl,
+ *
+ * On most architectures, the compat_ptr_ioctl() just passes all arguments
+ * to the corresponding ->ioctl handler. The exception is arch/s390, where
+ * compat_ptr() clears the top bit of a 32-bit pointer value, so user space
+ * pointers to the second 2GB alias the first 2GB, as is the case for
+ * native 32-bit s390 user space.
+ *
+ * The compat_ptr_ioctl() function must therefore be used only with ioctl
+ * functions that either ignore the argument or pass a pointer to a
+ * compatible data type.
+ *
+ * If any ioctl command handled by fops->unlocked_ioctl passes a plain
+ * integer instead of a pointer, or any of the passed data types
+ * is incompatible between 32-bit and 64-bit architectures, a proper
+ * handler is required instead of compat_ptr_ioctl.
+ */
+long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ if (!file->f_op->unlocked_ioctl)
+ return -ENOIOCTLCMD;
+
+ return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+EXPORT_SYMBOL(compat_ptr_ioctl);
+
+COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg)
+{
+ struct fd f = fdget(fd);
+ int error;
+
+ if (!f.file)
+ return -EBADF;
+
+ /* RED-PEN how should LSM module know it's handling 32bit? */
+ error = security_file_ioctl(f.file, cmd, arg);
+ if (error)
+ goto out;
+
+ switch (cmd) {
+ /* FICLONE takes an int argument, so don't use compat_ptr() */
+ case FICLONE:
+ error = ioctl_file_clone(f.file, arg, 0, 0, 0);
+ break;
+
+#if defined(CONFIG_X86_64)
+ /* these get messy on amd64 due to alignment differences */
+ case FS_IOC_RESVSP_32:
+ case FS_IOC_RESVSP64_32:
+ error = compat_ioctl_preallocate(f.file, 0, compat_ptr(arg));
+ break;
+ case FS_IOC_UNRESVSP_32:
+ case FS_IOC_UNRESVSP64_32:
+ error = compat_ioctl_preallocate(f.file, FALLOC_FL_PUNCH_HOLE,
+ compat_ptr(arg));
+ break;
+ case FS_IOC_ZERO_RANGE_32:
+ error = compat_ioctl_preallocate(f.file, FALLOC_FL_ZERO_RANGE,
+ compat_ptr(arg));
+ break;
+#endif
+
+ /*
+ * These access 32-bit values anyway so no further handling is
+ * necessary.
+ */
+ case FS_IOC32_GETFLAGS:
+ case FS_IOC32_SETFLAGS:
+ cmd = (cmd == FS_IOC32_GETFLAGS) ?
+ FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
+ fallthrough;
+ /*
+ * everything else in do_vfs_ioctl() takes either a compatible
+ * pointer argument or no argument -- call it with a modified
+ * argument.
+ */
+ default:
+ error = do_vfs_ioctl(f.file, fd, cmd,
+ (unsigned long)compat_ptr(arg));
+ if (error != -ENOIOCTLCMD)
+ break;
+
+ if (f.file->f_op->compat_ioctl)
+ error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
+ if (error == -ENOIOCTLCMD)
+ error = -ENOTTY;
+ break;
+ }
+
+ out:
+ fdput(f);
+
+ return error;
+}
+#endif