aboutsummaryrefslogtreecommitdiff
path: root/fs/cachefiles/namei.c
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
committerLibravatar Linus Torvalds <torvalds@linux-foundation.org>2023-02-21 18:24:12 -0800
commit5b7c4cabbb65f5c469464da6c5f614cbd7f730f2 (patch)
treecc5c2d0a898769fd59549594fedb3ee6f84e59a0 /fs/cachefiles/namei.c
downloadlinux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.tar.gz
linux-5b7c4cabbb65f5c469464da6c5f614cbd7f730f2.zip
Merge tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextgrafted
Pull networking updates from Jakub Kicinski: "Core: - Add dedicated kmem_cache for typical/small skb->head, avoid having to access struct page at kfree time, and improve memory use. - Introduce sysctl to set default RPS configuration for new netdevs. - Define Netlink protocol specification format which can be used to describe messages used by each family and auto-generate parsers. Add tools for generating kernel data structures and uAPI headers. - Expose all net/core sysctls inside netns. - Remove 4s sleep in netpoll if carrier is instantly detected on boot. - Add configurable limit of MDB entries per port, and port-vlan. - Continue populating drop reasons throughout the stack. - Retire a handful of legacy Qdiscs and classifiers. Protocols: - Support IPv4 big TCP (TSO frames larger than 64kB). - Add IP_LOCAL_PORT_RANGE socket option, to control local port range on socket by socket basis. - Track and report in procfs number of MPTCP sockets used. - Support mixing IPv4 and IPv6 flows in the in-kernel MPTCP path manager. - IPv6: don't check net.ipv6.route.max_size and rely on garbage collection to free memory (similarly to IPv4). - Support Penultimate Segment Pop (PSP) flavor in SRv6 (RFC8986). - ICMP: add per-rate limit counters. - Add support for user scanning requests in ieee802154. - Remove static WEP support. - Support minimal Wi-Fi 7 Extremely High Throughput (EHT) rate reporting. - WiFi 7 EHT channel puncturing support (client & AP). BPF: - Add a rbtree data structure following the "next-gen data structure" precedent set by recently added linked list, that is, by using kfunc + kptr instead of adding a new BPF map type. - Expose XDP hints via kfuncs with initial support for RX hash and timestamp metadata. - Add BPF_F_NO_TUNNEL_KEY extension to bpf_skb_set_tunnel_key to better support decap on GRE tunnel devices not operating in collect metadata. - Improve x86 JIT's codegen for PROBE_MEM runtime error checks. - Remove the need for trace_printk_lock for bpf_trace_printk and bpf_trace_vprintk helpers. - Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case. - Significantly reduce the search time for module symbols by livepatch and BPF. - Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals. - Add support for BPF trampoline on s390x and riscv64. - Add capability to export the XDP features supported by the NIC. - Add __bpf_kfunc tag for marking kernel functions as kfuncs. - Add cgroup.memory=nobpf kernel parameter option to disable BPF memory accounting for container environments. Netfilter: - Remove the CLUSTERIP target. It has been marked as obsolete for years, and we still have WARN splats wrt races of the out-of-band /proc interface installed by this target. - Add 'destroy' commands to nf_tables. They are identical to the existing 'delete' commands, but do not return an error if the referenced object (set, chain, rule...) did not exist. Driver API: - Improve cpumask_local_spread() locality to help NICs set the right IRQ affinity on AMD platforms. - Separate C22 and C45 MDIO bus transactions more clearly. - Introduce new DCB table to control DSCP rewrite on egress. - Support configuration of Physical Layer Collision Avoidance (PLCA) Reconciliation Sublayer (RS) (802.3cg-2019). Modern version of shared medium Ethernet. - Support for MAC Merge layer (IEEE 802.3-2018 clause 99). Allowing preemption of low priority frames by high priority frames. - Add support for controlling MACSec offload using netlink SET. - Rework devlink instance refcounts to allow registration and de-registration under the instance lock. Split the code into multiple files, drop some of the unnecessarily granular locks and factor out common parts of netlink operation handling. - Add TX frame aggregation parameters (for USB drivers). - Add a new attr TCA_EXT_WARN_MSG to report TC (offload) warning messages with notifications for debug. - Allow offloading of UDP NEW connections via act_ct. - Add support for per action HW stats in TC. - Support hardware miss to TC action (continue processing in SW from a specific point in the action chain). - Warn if old Wireless Extension user space interface is used with modern cfg80211/mac80211 drivers. Do not support Wireless Extensions for Wi-Fi 7 devices at all. Everyone should switch to using nl80211 interface instead. - Improve the CAN bit timing configuration. Use extack to return error messages directly to user space, update the SJW handling, including the definition of a new default value that will benefit CAN-FD controllers, by increasing their oscillator tolerance. New hardware / drivers: - Ethernet: - nVidia BlueField-3 support (control traffic driver) - Ethernet support for imx93 SoCs - Motorcomm yt8531 gigabit Ethernet PHY - onsemi NCN26000 10BASE-T1S PHY (with support for PLCA) - Microchip LAN8841 PHY (incl. cable diagnostics and PTP) - Amlogic gxl MDIO mux - WiFi: - RealTek RTL8188EU (rtl8xxxu) - Qualcomm Wi-Fi 7 devices (ath12k) - CAN: - Renesas R-Car V4H Drivers: - Bluetooth: - Set Per Platform Antenna Gain (PPAG) for Intel controllers. - Ethernet NICs: - Intel (1G, igc): - support TSN / Qbv / packet scheduling features of i226 model - Intel (100G, ice): - use GNSS subsystem instead of TTY - multi-buffer XDP support - extend support for GPIO pins to E823 devices - nVidia/Mellanox: - update the shared buffer configuration on PFC commands - implement PTP adjphase function for HW offset control - TC support for Geneve and GRE with VF tunnel offload - more efficient crypto key management method - multi-port eswitch support - Netronome/Corigine: - add DCB IEEE support - support IPsec offloading for NFP3800 - Freescale/NXP (enetc): - support XDP_REDIRECT for XDP non-linear buffers - improve reconfig, avoid link flap and waiting for idle - support MAC Merge layer - Other NICs: - sfc/ef100: add basic devlink support for ef100 - ionic: rx_push mode operation (writing descriptors via MMIO) - bnxt: use the auxiliary bus abstraction for RDMA - r8169: disable ASPM and reset bus in case of tx timeout - cpsw: support QSGMII mode for J721e CPSW9G - cpts: support pulse-per-second output - ngbe: add an mdio bus driver - usbnet: optimize usbnet_bh() by avoiding unnecessary queuing - r8152: handle devices with FW with NCM support - amd-xgbe: support 10Mbps, 2.5GbE speeds and rx-adaptation - virtio-net: support multi buffer XDP - virtio/vsock: replace virtio_vsock_pkt with sk_buff - tsnep: XDP support - Ethernet high-speed switches: - nVidia/Mellanox (mlxsw): - add support for latency TLV (in FW control messages) - Microchip (sparx5): - separate explicit and implicit traffic forwarding rules, make the implicit rules always active - add support for egress DSCP rewrite - IS0 VCAP support (Ingress Classification) - IS2 VCAP filters (protos, L3 addrs, L4 ports, flags, ToS etc.) - ES2 VCAP support (Egress Access Control) - support for Per-Stream Filtering and Policing (802.1Q, 8.6.5.1) - Ethernet embedded switches: - Marvell (mv88e6xxx): - add MAB (port auth) offload support - enable PTP receive for mv88e6390 - NXP (ocelot): - support MAC Merge layer - support for the the vsc7512 internal copper phys - Microchip: - lan9303: convert to PHYLINK - lan966x: support TC flower filter statistics - lan937x: PTP support for KSZ9563/KSZ8563 and LAN937x - lan937x: support Credit Based Shaper configuration - ksz9477: support Energy Efficient Ethernet - other: - qca8k: convert to regmap read/write API, use bulk operations - rswitch: Improve TX timestamp accuracy - Intel WiFi (iwlwifi): - EHT (Wi-Fi 7) rate reporting - STEP equalizer support: transfer some STEP (connection to radio on platforms with integrated wifi) related parameters from the BIOS to the firmware. - Qualcomm 802.11ax WiFi (ath11k): - IPQ5018 support - Fine Timing Measurement (FTM) responder role support - channel 177 support - MediaTek WiFi (mt76): - per-PHY LED support - mt7996: EHT (Wi-Fi 7) support - Wireless Ethernet Dispatch (WED) reset support - switch to using page pool allocator - RealTek WiFi (rtw89): - support new version of Bluetooth co-existance - Mobile: - rmnet: support TX aggregation" * tag 'net-next-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1872 commits) page_pool: add a comment explaining the fragment counter usage net: ethtool: fix __ethtool_dev_mm_supported() implementation ethtool: pse-pd: Fix double word in comments xsk: add linux/vmalloc.h to xsk.c sefltests: netdevsim: wait for devlink instance after netns removal selftest: fib_tests: Always cleanup before exit net/mlx5e: Align IPsec ASO result memory to be as required by hardware net/mlx5e: TC, Set CT miss to the specific ct action instance net/mlx5e: Rename CHAIN_TO_REG to MAPPED_OBJ_TO_REG net/mlx5: Refactor tc miss handling to a single function net/mlx5: Kconfig: Make tc offload depend on tc skb extension net/sched: flower: Support hardware miss to tc action net/sched: flower: Move filter handle initialization earlier net/sched: cls_api: Support hardware miss to tc action net/sched: Rename user cookie and act cookie sfc: fix builds without CONFIG_RTC_LIB sfc: clean up some inconsistent indentings net/mlx4_en: Introduce flexible array to silence overflow warning net: lan966x: Fix possible deadlock inside PTP net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). ...
Diffstat (limited to '')
-rw-r--r--fs/cachefiles/namei.c860
1 files changed, 860 insertions, 0 deletions
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
new file mode 100644
index 000000000..82219a8f6
--- /dev/null
+++ b/fs/cachefiles/namei.c
@@ -0,0 +1,860 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* CacheFiles path walking and related routines
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+/*
+ * Mark the backing file as being a cache file if it's not already in use. The
+ * mark tells the culling request command that it's not allowed to cull the
+ * file or directory. The caller must hold the inode lock.
+ */
+static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ bool can_use = false;
+
+ if (!(inode->i_flags & S_KERNEL_FILE)) {
+ inode->i_flags |= S_KERNEL_FILE;
+ trace_cachefiles_mark_active(object, inode);
+ can_use = true;
+ } else {
+ trace_cachefiles_mark_failed(object, inode);
+ }
+
+ return can_use;
+}
+
+static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ bool can_use;
+
+ inode_lock(inode);
+ can_use = __cachefiles_mark_inode_in_use(object, inode);
+ inode_unlock(inode);
+ return can_use;
+}
+
+/*
+ * Unmark a backing inode. The caller must hold the inode lock.
+ */
+static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ inode->i_flags &= ~S_KERNEL_FILE;
+ trace_cachefiles_mark_inactive(object, inode);
+}
+
+static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object,
+ struct inode *inode)
+{
+ inode_lock(inode);
+ __cachefiles_unmark_inode_in_use(object, inode);
+ inode_unlock(inode);
+}
+
+/*
+ * Unmark a backing inode and tell cachefilesd that there's something that can
+ * be culled.
+ */
+void cachefiles_unmark_inode_in_use(struct cachefiles_object *object,
+ struct file *file)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct inode *inode = file_inode(file);
+
+ cachefiles_do_unmark_inode_in_use(object, inode);
+
+ if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
+ atomic_long_add(inode->i_blocks, &cache->b_released);
+ if (atomic_inc_return(&cache->f_released))
+ cachefiles_state_changed(cache);
+ }
+}
+
+/*
+ * get a subdirectory
+ */
+struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ const char *dirname,
+ bool *_is_new)
+{
+ struct dentry *subdir;
+ struct path path;
+ int ret;
+
+ _enter(",,%s", dirname);
+
+ /* search the current directory for the element name */
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+
+retry:
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ subdir = lookup_one_len(dirname, dir, strlen(dirname));
+ else
+ subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
+ if (IS_ERR(subdir)) {
+ trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
+ PTR_ERR(subdir),
+ cachefiles_trace_lookup_error);
+ if (PTR_ERR(subdir) == -ENOMEM)
+ goto nomem_d_alloc;
+ goto lookup_error;
+ }
+
+ _debug("subdir -> %pd %s",
+ subdir, d_backing_inode(subdir) ? "positive" : "negative");
+
+ /* we need to create the subdir if it doesn't exist yet */
+ if (d_is_negative(subdir)) {
+ ret = cachefiles_has_space(cache, 1, 0,
+ cachefiles_has_space_for_create);
+ if (ret < 0)
+ goto mkdir_error;
+
+ _debug("attempt mkdir");
+
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, subdir, 0700);
+ if (ret < 0)
+ goto mkdir_error;
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
+ cachefiles_trace_mkdir_error);
+ goto mkdir_error;
+ }
+ trace_cachefiles_mkdir(dir, subdir);
+
+ if (unlikely(d_unhashed(subdir))) {
+ cachefiles_put_directory(subdir);
+ goto retry;
+ }
+ ASSERT(d_backing_inode(subdir));
+
+ _debug("mkdir -> %pd{ino=%lu}",
+ subdir, d_backing_inode(subdir)->i_ino);
+ if (_is_new)
+ *_is_new = true;
+ }
+
+ /* Tell rmdir() it's not allowed to delete the subdir */
+ inode_lock(d_inode(subdir));
+ inode_unlock(d_inode(dir));
+
+ if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ subdir, d_inode(subdir)->i_ino);
+ goto mark_error;
+ }
+
+ inode_unlock(d_inode(subdir));
+
+ /* we need to make sure the subdir is a directory */
+ ASSERT(d_backing_inode(subdir));
+
+ if (!d_can_lookup(subdir)) {
+ pr_err("%s is not a directory\n", dirname);
+ ret = -EIO;
+ goto check_error;
+ }
+
+ ret = -EPERM;
+ if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) ||
+ !d_backing_inode(subdir)->i_op->lookup ||
+ !d_backing_inode(subdir)->i_op->mkdir ||
+ !d_backing_inode(subdir)->i_op->rename ||
+ !d_backing_inode(subdir)->i_op->rmdir ||
+ !d_backing_inode(subdir)->i_op->unlink)
+ goto check_error;
+
+ _leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
+ return subdir;
+
+check_error:
+ cachefiles_put_directory(subdir);
+ _leave(" = %d [check]", ret);
+ return ERR_PTR(ret);
+
+mark_error:
+ inode_unlock(d_inode(subdir));
+ dput(subdir);
+ return ERR_PTR(-EBUSY);
+
+mkdir_error:
+ inode_unlock(d_inode(dir));
+ dput(subdir);
+ pr_err("mkdir %s failed with error %d\n", dirname, ret);
+ return ERR_PTR(ret);
+
+lookup_error:
+ inode_unlock(d_inode(dir));
+ ret = PTR_ERR(subdir);
+ pr_err("Lookup %s failed with error %d\n", dirname, ret);
+ return ERR_PTR(ret);
+
+nomem_d_alloc:
+ inode_unlock(d_inode(dir));
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Put a subdirectory.
+ */
+void cachefiles_put_directory(struct dentry *dir)
+{
+ if (dir) {
+ cachefiles_do_unmark_inode_in_use(NULL, d_inode(dir));
+ dput(dir);
+ }
+}
+
+/*
+ * Remove a regular file from the cache.
+ */
+static int cachefiles_unlink(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dir, struct dentry *dentry,
+ enum fscache_why_object_killed why)
+{
+ struct path path = {
+ .mnt = cache->mnt,
+ .dentry = dir,
+ };
+ int ret;
+
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
+ ret = security_path_unlink(&path, dentry);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Unlink security error");
+ return ret;
+ }
+
+ ret = cachefiles_inject_remove_error();
+ if (ret == 0) {
+ ret = vfs_unlink(&nop_mnt_idmap, d_backing_inode(dir), dentry, NULL);
+ if (ret == -EIO)
+ cachefiles_io_error(cache, "Unlink failed");
+ }
+ if (ret != 0)
+ trace_cachefiles_vfs_error(object, d_backing_inode(dir), ret,
+ cachefiles_trace_unlink_error);
+ return ret;
+}
+
+/*
+ * Delete an object representation from the cache
+ * - File backed objects are unlinked
+ * - Directory backed objects are stuffed into the graveyard for userspace to
+ * delete
+ */
+int cachefiles_bury_object(struct cachefiles_cache *cache,
+ struct cachefiles_object *object,
+ struct dentry *dir,
+ struct dentry *rep,
+ enum fscache_why_object_killed why)
+{
+ struct dentry *grave, *trap;
+ struct path path, path_to_graveyard;
+ char nbuffer[8 + 8 + 1];
+ int ret;
+
+ _enter(",'%pd','%pd'", dir, rep);
+
+ if (rep->d_parent != dir) {
+ inode_unlock(d_inode(dir));
+ _leave(" = -ESTALE");
+ return -ESTALE;
+ }
+
+ /* non-directories can just be unlinked */
+ if (!d_is_dir(rep)) {
+ dget(rep); /* Stop the dentry being negated if it's only pinned
+ * by a file struct.
+ */
+ ret = cachefiles_unlink(cache, object, dir, rep, why);
+ dput(rep);
+
+ inode_unlock(d_inode(dir));
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ /* directories have to be moved to the graveyard */
+ _debug("move stale object to graveyard");
+ inode_unlock(d_inode(dir));
+
+try_again:
+ /* first step is to make up a grave dentry in the graveyard */
+ sprintf(nbuffer, "%08x%08x",
+ (uint32_t) ktime_get_real_seconds(),
+ (uint32_t) atomic_inc_return(&cache->gravecounter));
+
+ /* do the multiway lock magic */
+ trap = lock_rename(cache->graveyard, dir);
+
+ /* do some checks before getting the grave dentry */
+ if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
+ /* the entry was probably culled when we dropped the parent dir
+ * lock */
+ unlock_rename(cache->graveyard, dir);
+ _leave(" = 0 [culled?]");
+ return 0;
+ }
+
+ if (!d_can_lookup(cache->graveyard)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Graveyard no longer a directory");
+ return -EIO;
+ }
+
+ if (trap == rep) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ if (d_mountpoint(rep)) {
+ unlock_rename(cache->graveyard, dir);
+ cachefiles_io_error(cache, "Mountpoint in cache");
+ return -EIO;
+ }
+
+ grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
+ if (IS_ERR(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ trace_cachefiles_vfs_error(object, d_inode(cache->graveyard),
+ PTR_ERR(grave),
+ cachefiles_trace_lookup_error);
+
+ if (PTR_ERR(grave) == -ENOMEM) {
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+ }
+
+ cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave));
+ return -EIO;
+ }
+
+ if (d_is_positive(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ grave = NULL;
+ cond_resched();
+ goto try_again;
+ }
+
+ if (d_mountpoint(grave)) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "Mountpoint in graveyard");
+ return -EIO;
+ }
+
+ /* target should not be an ancestor of source */
+ if (trap == grave) {
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ cachefiles_io_error(cache, "May not make directory loop");
+ return -EIO;
+ }
+
+ /* attempt the rename */
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ path_to_graveyard.mnt = cache->mnt;
+ path_to_graveyard.dentry = cache->graveyard;
+ ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Rename security error %d", ret);
+ } else {
+ struct renamedata rd = {
+ .old_mnt_idmap = &nop_mnt_idmap,
+ .old_dir = d_inode(dir),
+ .old_dentry = rep,
+ .new_mnt_idmap = &nop_mnt_idmap,
+ .new_dir = d_inode(cache->graveyard),
+ .new_dentry = grave,
+ };
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ ret = vfs_rename(&rd);
+ if (ret != 0)
+ trace_cachefiles_vfs_error(object, d_inode(dir), ret,
+ cachefiles_trace_rename_error);
+ if (ret != 0 && ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Rename failed with error %d", ret);
+ }
+
+ __cachefiles_unmark_inode_in_use(object, d_inode(rep));
+ unlock_rename(cache->graveyard, dir);
+ dput(grave);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Delete a cache file.
+ */
+int cachefiles_delete_object(struct cachefiles_object *object,
+ enum fscache_why_object_killed why)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry = object->file->f_path.dentry;
+ struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];
+ int ret;
+
+ _enter(",OBJ%x{%pD}", object->debug_id, object->file);
+
+ /* Stop the dentry being negated if it's only pinned by a file struct. */
+ dget(dentry);
+
+ inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
+ inode_unlock(d_backing_inode(fan));
+ dput(dentry);
+ return ret;
+}
+
+/*
+ * Create a temporary file and leave it unattached and un-xattr'd until the
+ * time comes to discard the object from memory.
+ */
+struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct cachefiles_cache *cache = volume->cache;
+ const struct cred *saved_cred;
+ struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];
+ struct file *file;
+ const struct path parentpath = { .mnt = cache->mnt, .dentry = fan };
+ uint64_t ni_size;
+ long ret;
+
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ ret = cachefiles_inject_write_error();
+ if (ret == 0) {
+ file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
+ O_RDWR | O_LARGEFILE | O_DIRECT,
+ cache->cache_cred);
+ ret = PTR_ERR_OR_ZERO(file);
+ }
+ if (ret) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), ret,
+ cachefiles_trace_tmpfile_error);
+ if (ret == -EIO)
+ cachefiles_io_error_obj(object, "Failed to create tmpfile");
+ goto err;
+ }
+
+ trace_cachefiles_tmpfile(object, file_inode(file));
+
+ /* This is a newly created file with no other possible user */
+ if (!cachefiles_mark_inode_in_use(object, file_inode(file)))
+ WARN_ON(1);
+
+ ret = cachefiles_ondemand_init_object(object);
+ if (ret < 0)
+ goto err_unuse;
+
+ ni_size = object->cookie->object_size;
+ ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);
+
+ if (ni_size > 0) {
+ trace_cachefiles_trunc(object, file_inode(file), 0, ni_size,
+ cachefiles_trunc_expand_tmpfile);
+ ret = cachefiles_inject_write_error();
+ if (ret == 0)
+ ret = vfs_truncate(&file->f_path, ni_size);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(
+ object, file_inode(file), ret,
+ cachefiles_trace_trunc_error);
+ goto err_unuse;
+ }
+ }
+
+ ret = -EINVAL;
+ if (unlikely(!file->f_op->read_iter) ||
+ unlikely(!file->f_op->write_iter)) {
+ fput(file);
+ pr_notice("Cache does not support read_iter and write_iter\n");
+ goto err_unuse;
+ }
+out:
+ cachefiles_end_secure(cache, saved_cred);
+ return file;
+
+err_unuse:
+ cachefiles_do_unmark_inode_in_use(object, file_inode(file));
+ fput(file);
+err:
+ file = ERR_PTR(ret);
+ goto out;
+}
+
+/*
+ * Create a new file.
+ */
+static bool cachefiles_create_file(struct cachefiles_object *object)
+{
+ struct file *file;
+ int ret;
+
+ ret = cachefiles_has_space(object->volume->cache, 1, 0,
+ cachefiles_has_space_for_create);
+ if (ret < 0)
+ return false;
+
+ file = cachefiles_create_tmpfile(object);
+ if (IS_ERR(file))
+ return false;
+
+ set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags);
+ set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+ _debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino);
+ object->file = file;
+ return true;
+}
+
+/*
+ * Open an existing file, checking its attributes and replacing it if it is
+ * stale.
+ */
+static bool cachefiles_open_file(struct cachefiles_object *object,
+ struct dentry *dentry)
+{
+ struct cachefiles_cache *cache = object->volume->cache;
+ struct file *file;
+ struct path path;
+ int ret;
+
+ _enter("%pd", dentry);
+
+ if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) {
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, d_inode(dentry)->i_ino);
+ return false;
+ }
+
+ /* We need to open a file interface onto a data file now as we can't do
+ * it on demand because writeback called from do_exit() sees
+ * current->fs == NULL - which breaks d_path() called from ext4 open.
+ */
+ path.mnt = cache->mnt;
+ path.dentry = dentry;
+ file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+ d_backing_inode(dentry), cache->cache_cred);
+ if (IS_ERR(file)) {
+ trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
+ PTR_ERR(file),
+ cachefiles_trace_open_error);
+ goto error;
+ }
+
+ if (unlikely(!file->f_op->read_iter) ||
+ unlikely(!file->f_op->write_iter)) {
+ pr_notice("Cache does not support read_iter and write_iter\n");
+ goto error_fput;
+ }
+ _debug("file -> %pd positive", dentry);
+
+ ret = cachefiles_ondemand_init_object(object);
+ if (ret < 0)
+ goto error_fput;
+
+ ret = cachefiles_check_auxdata(object, file);
+ if (ret < 0)
+ goto check_failed;
+
+ object->file = file;
+
+ /* Always update the atime on an object we've just looked up (this is
+ * used to keep track of culling, and atimes are only updated by read,
+ * write and readdir but not lookup or open).
+ */
+ touch_atime(&file->f_path);
+ dput(dentry);
+ return true;
+
+check_failed:
+ fscache_cookie_lookup_negative(object->cookie);
+ cachefiles_unmark_inode_in_use(object, file);
+ fput(file);
+ dput(dentry);
+ if (ret == -ESTALE)
+ return cachefiles_create_file(object);
+ return false;
+
+error_fput:
+ fput(file);
+error:
+ cachefiles_do_unmark_inode_in_use(object, d_inode(dentry));
+ dput(dentry);
+ return false;
+}
+
+/*
+ * walk from the parent object to the child object through the backing
+ * filesystem, creating directories as we go
+ */
+bool cachefiles_look_up_object(struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash];
+ int ret;
+
+ _enter("OBJ%x,%s,", object->debug_id, object->d_name);
+
+ /* Look up path "cache/vol/fanout/file". */
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_positive_unlocked(object->d_name, fan,
+ object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ trace_cachefiles_lookup(object, fan, dentry);
+ if (IS_ERR(dentry)) {
+ if (dentry == ERR_PTR(-ENOENT))
+ goto new_file;
+ if (dentry == ERR_PTR(-EIO))
+ cachefiles_io_error_obj(object, "Lookup failed");
+ return false;
+ }
+
+ if (!d_is_reg(dentry)) {
+ pr_err("%pd is not a file\n", dentry);
+ inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_bury_object(volume->cache, object, fan, dentry,
+ FSCACHE_OBJECT_IS_WEIRD);
+ dput(dentry);
+ if (ret < 0)
+ return false;
+ goto new_file;
+ }
+
+ if (!cachefiles_open_file(object, dentry))
+ return false;
+
+ _leave(" = t [%lu]", file_inode(object->file)->i_ino);
+ return true;
+
+new_file:
+ fscache_cookie_lookup_negative(object->cookie);
+ return cachefiles_create_file(object);
+}
+
+/*
+ * Attempt to link a temporary file into its rightful place in the cache.
+ */
+bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+{
+ struct cachefiles_volume *volume = object->volume;
+ struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash];
+ bool success = false;
+ int ret;
+
+ _enter(",%pD", object->file);
+
+ inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ if (IS_ERR(dentry)) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
+ cachefiles_trace_lookup_error);
+ _debug("lookup fail %ld", PTR_ERR(dentry));
+ goto out_unlock;
+ }
+
+ if (!d_is_negative(dentry)) {
+ if (d_backing_inode(dentry) == file_inode(object->file)) {
+ success = true;
+ goto out_dput;
+ }
+
+ ret = cachefiles_unlink(volume->cache, object, fan, dentry,
+ FSCACHE_OBJECT_IS_STALE);
+ if (ret < 0)
+ goto out_dput;
+
+ dput(dentry);
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ dentry = lookup_one_len(object->d_name, fan, object->d_name_len);
+ else
+ dentry = ERR_PTR(ret);
+ if (IS_ERR(dentry)) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
+ cachefiles_trace_lookup_error);
+ _debug("lookup fail %ld", PTR_ERR(dentry));
+ goto out_unlock;
+ }
+ }
+
+ ret = cachefiles_inject_read_error();
+ if (ret == 0)
+ ret = vfs_link(object->file->f_path.dentry, &nop_mnt_idmap,
+ d_inode(fan), dentry, NULL);
+ if (ret < 0) {
+ trace_cachefiles_vfs_error(object, d_inode(fan), ret,
+ cachefiles_trace_link_error);
+ _debug("link fail %d", ret);
+ } else {
+ trace_cachefiles_link(object, file_inode(object->file));
+ spin_lock(&object->lock);
+ /* TODO: Do we want to switch the file pointer to the new dentry? */
+ clear_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags);
+ spin_unlock(&object->lock);
+ success = true;
+ }
+
+out_dput:
+ dput(dentry);
+out_unlock:
+ inode_unlock(d_inode(fan));
+ _leave(" = %u", success);
+ return success;
+}
+
+/*
+ * Look up an inode to be checked or culled. Return -EBUSY if the inode is
+ * marked in use.
+ */
+static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
+ struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ int ret = -ENOENT;
+
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+
+ victim = lookup_one_len(filename, dir, strlen(filename));
+ if (IS_ERR(victim))
+ goto lookup_error;
+ if (d_is_negative(victim))
+ goto lookup_put;
+ if (d_inode(victim)->i_flags & S_KERNEL_FILE)
+ goto lookup_busy;
+ return victim;
+
+lookup_busy:
+ ret = -EBUSY;
+lookup_put:
+ inode_unlock(d_inode(dir));
+ dput(victim);
+ return ERR_PTR(ret);
+
+lookup_error:
+ inode_unlock(d_inode(dir));
+ ret = PTR_ERR(victim);
+ if (ret == -ENOENT)
+ return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
+
+ if (ret == -EIO) {
+ cachefiles_io_error(cache, "Lookup failed");
+ } else if (ret != -ENOMEM) {
+ pr_err("Internal error: %d\n", ret);
+ ret = -EIO;
+ }
+
+ return ERR_PTR(ret);
+}
+
+/*
+ * Cull an object if it's not in use
+ * - called only by cache manager daemon
+ */
+int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ struct inode *inode;
+ int ret;
+
+ _enter(",%pd/,%s", dir, filename);
+
+ victim = cachefiles_lookup_for_cull(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ /* check to see if someone is using this object */
+ inode = d_inode(victim);
+ inode_lock(inode);
+ if (inode->i_flags & S_KERNEL_FILE) {
+ ret = -EBUSY;
+ } else {
+ /* Stop the cache from picking it back up */
+ inode->i_flags |= S_KERNEL_FILE;
+ ret = 0;
+ }
+ inode_unlock(inode);
+ if (ret < 0)
+ goto error_unlock;
+
+ ret = cachefiles_bury_object(cache, NULL, dir, victim,
+ FSCACHE_OBJECT_WAS_CULLED);
+ if (ret < 0)
+ goto error;
+
+ fscache_count_culled();
+ dput(victim);
+ _leave(" = 0");
+ return 0;
+
+error_unlock:
+ inode_unlock(d_inode(dir));
+error:
+ dput(victim);
+ if (ret == -ENOENT)
+ return -ESTALE; /* Probably got retired by the netfs */
+
+ if (ret != -ENOMEM) {
+ pr_err("Internal error: %d\n", ret);
+ ret = -EIO;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Find out if an object is in use or not
+ * - called only by cache manager daemon
+ * - returns -EBUSY or 0 to indicate whether an object is in use or not
+ */
+int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
+ char *filename)
+{
+ struct dentry *victim;
+ int ret = 0;
+
+ victim = cachefiles_lookup_for_cull(cache, dir, filename);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
+
+ inode_unlock(d_inode(dir));
+ dput(victim);
+ return ret;
+}