diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-08 21:40:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-08 21:40:30 -0700 |
commit | c6b80eb89b55590b12db11103913088735205b5c (patch) | |
tree | 22182f07ee9ab31f5b8357531af91d90fad925d0 /fs | |
parent | 9744b923d50810bb489e49bfe89d0b4d5c84be31 (diff) | |
parent | 2eda9eaa6d7ec129150df4c7b7be65f27ac47346 (diff) |
Merge tag 'ovl-update-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs
Pull overlayfs update from Miklos Szeredi:
- Fix failure to copy-up files from certain NFSv4 mounts
- Sort out inconsistencies between st_ino and i_ino (used in /proc/locks)
- Allow consistent (POSIX-y) inode numbering in more cases
- Allow virtiofs to be used as upper layer
- Miscellaneous cleanups and fixes
* tag 'ovl-update-5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
ovl: document xino expected behavior
ovl: enable xino automatically in more cases
ovl: avoid possible inode number collisions with xino=on
ovl: use a private non-persistent ino pool
ovl: fix WARN_ON nlink drop to zero
ovl: fix a typo in comment
ovl: replace zero-length array with flexible-array member
ovl: ovl_obtain_alias(): don't call d_instantiate_anon() for old
ovl: strict upper fs requirements for remote upper fs
ovl: check if upper fs supports RENAME_WHITEOUT
ovl: allow remote upper
ovl: decide if revalidate needed on a per-dentry basis
ovl: separate detection of remote upper layer from stacked overlay
ovl: restructure dentry revalidation
ovl: ignore failure to copy up unknown xattrs
ovl: document permission model
ovl: simplify i_ino initialization
ovl: factor out helper ovl_get_root()
ovl: fix out of date comment and unreachable code
ovl: fix value of i_ino for lower hardlink corner case
Diffstat (limited to 'fs')
-rw-r--r-- | fs/overlayfs/copy_up.c | 16 | ||||
-rw-r--r-- | fs/overlayfs/dir.c | 31 | ||||
-rw-r--r-- | fs/overlayfs/export.c | 40 | ||||
-rw-r--r-- | fs/overlayfs/inode.c | 99 | ||||
-rw-r--r-- | fs/overlayfs/namei.c | 5 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 25 | ||||
-rw-r--r-- | fs/overlayfs/ovl_entry.h | 2 | ||||
-rw-r--r-- | fs/overlayfs/readdir.c | 25 | ||||
-rw-r--r-- | fs/overlayfs/super.c | 258 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 40 |
10 files changed, 380 insertions, 161 deletions
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9fc47c2e078d..9709cf22cab3 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -36,6 +36,13 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param) module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); +static bool ovl_must_copy_xattr(const char *name) +{ + return !strcmp(name, XATTR_POSIX_ACL_ACCESS) || + !strcmp(name, XATTR_POSIX_ACL_DEFAULT) || + !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); +} + int ovl_copy_xattr(struct dentry *old, struct dentry *new) { ssize_t list_size, size, value_size = 0; @@ -107,8 +114,13 @@ retry: continue; /* Discard */ } error = vfs_setxattr(new, name, value, size, 0); - if (error) - break; + if (error) { + if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name)) + break; + + /* Ignore failure to copy unknown xattrs */ + error = 0; + } } kfree(value); out: diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 8e57d5372b8f..279009dee366 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -42,7 +42,7 @@ int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) return err; } -static struct dentry *ovl_lookup_temp(struct dentry *workdir) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -243,6 +243,9 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, ovl_dir_modified(dentry->d_parent, false); ovl_dentry_set_upper_alias(dentry); + ovl_dentry_update_reval(dentry, newdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + if (!hardlink) { /* * ovl_obtain_alias() can be called after ovl_create_real() @@ -819,6 +822,28 @@ static bool ovl_pure_upper(struct dentry *dentry) !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); } +static void ovl_drop_nlink(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct dentry *alias; + + /* Try to find another, hashed alias */ + spin_lock(&inode->i_lock); + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + if (alias != dentry && !d_unhashed(alias)) + break; + } + spin_unlock(&inode->i_lock); + + /* + * Changes to underlying layers may cause i_nlink to lose sync with + * reality. In this case prevent the link count from going to zero + * prematurely. + */ + if (inode->i_nlink > !!alias) + drop_nlink(inode); +} + static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; @@ -856,7 +881,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (is_dir) clear_nlink(dentry->d_inode); else - drop_nlink(dentry->d_inode); + ovl_drop_nlink(dentry); } ovl_nlink_end(dentry); @@ -1201,7 +1226,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (new_is_dir) clear_nlink(d_inode(new)); else - drop_nlink(d_inode(new)); + ovl_drop_nlink(new); } ovl_dir_modified(old->d_parent, ovl_type_origin(old) || diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 6f54d70cef27..475c61f53f0f 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -308,29 +308,35 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, ovl_set_flag(OVL_UPPERDATA, inode); dentry = d_find_any_alias(inode); - if (!dentry) { - dentry = d_alloc_anon(inode->i_sb); - if (!dentry) - goto nomem; - oe = ovl_alloc_entry(lower ? 1 : 0); - if (!oe) - goto nomem; - - if (lower) { - oe->lowerstack->dentry = dget(lower); - oe->lowerstack->layer = lowerpath->layer; - } - dentry->d_fsdata = oe; - if (upper_alias) - ovl_dentry_set_upper_alias(dentry); + if (dentry) + goto out_iput; + + dentry = d_alloc_anon(inode->i_sb); + if (unlikely(!dentry)) + goto nomem; + oe = ovl_alloc_entry(lower ? 1 : 0); + if (!oe) + goto nomem; + + if (lower) { + oe->lowerstack->dentry = dget(lower); + oe->lowerstack->layer = lowerpath->layer; } + dentry->d_fsdata = oe; + if (upper_alias) + ovl_dentry_set_upper_alias(dentry); + + ovl_dentry_update_reval(dentry, upper, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); return d_instantiate_anon(dentry, inode); nomem: - iput(inode); dput(dentry); - return ERR_PTR(-ENOMEM); + dentry = ERR_PTR(-ENOMEM); +out_iput: + iput(inode); + return dentry; } /* Get the upper or lower dentry in stach whose on layer @idx */ diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 79e8994e3bc1..b0d42ece4d7c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -79,6 +79,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); + unsigned int xinoshift = 64 - xinobits; if (samefs) { /* @@ -89,22 +90,22 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) stat->dev = dentry->d_sb->s_dev; return 0; } else if (xinobits) { - unsigned int shift = 64 - xinobits; /* * All inode numbers of underlying fs should not be using the * high xinobits, so we use high xinobits to partition the * overlay st_ino address space. The high bits holds the fsid - * (upper fsid is 0). This way overlay inode numbers are unique - * and all inodes use overlay st_dev. Inode numbers are also - * persistent for a given layer configuration. + * (upper fsid is 0). The lowest xinobit is reserved for mapping + * the non-peresistent inode numbers range in case of overflow. + * This way all overlay inode numbers are unique and use the + * overlay st_dev. */ - if (stat->ino >> shift) { - pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", - dentry, stat->ino, xinobits); - } else { - stat->ino |= ((u64)fsid) << shift; + if (likely(!(stat->ino >> xinoshift))) { + stat->ino |= ((u64)fsid) << (xinoshift + 1); stat->dev = dentry->d_sb->s_dev; return 0; + } else if (ovl_xino_warn(dentry->d_sb)) { + pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + dentry, stat->ino, xinobits); } } @@ -504,7 +505,7 @@ static const struct address_space_operations ovl_aops = { /* * It is possible to stack overlayfs instance on top of another - * overlayfs instance as lower layer. We need to annonate the + * overlayfs instance as lower layer. We need to annotate the * stackable i_mutex locks according to stack level of the super * block instance. An overlayfs instance can never be in stack * depth 0 (there is always a real fs below it). An overlayfs @@ -561,27 +562,73 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #endif } -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, - unsigned long ino, int fsid) +static void ovl_next_ino(struct inode *inode) +{ + struct ovl_fs *ofs = inode->i_sb->s_fs_info; + + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); + if (unlikely(!inode->i_ino)) + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); +} + +static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) { int xinobits = ovl_xino_bits(inode->i_sb); + unsigned int xinoshift = 64 - xinobits; /* * When d_ino is consistent with st_ino (samefs or i_ino has enough * bits to encode layer), set the same value used for st_ino to i_ino, * so inode number exposed via /proc/locks and a like will be * consistent with d_ino and st_ino values. An i_ino value inconsistent - * with d_ino also causes nfsd readdirplus to fail. When called from - * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real - * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). + * with d_ino also causes nfsd readdirplus to fail. */ - if (ovl_same_dev(inode->i_sb)) { - inode->i_ino = ino; - if (xinobits && fsid && !(ino >> (64 - xinobits))) - inode->i_ino |= (unsigned long)fsid << (64 - xinobits); - } else { - inode->i_ino = get_next_ino(); + inode->i_ino = ino; + if (ovl_same_fs(inode->i_sb)) { + return; + } else if (xinobits && likely(!(ino >> xinoshift))) { + inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); + return; + } + + /* + * For directory inodes on non-samefs with xino disabled or xino + * overflow, we allocate a non-persistent inode number, to be used for + * resolving st_ino collisions in ovl_map_dev_ino(). + * + * To avoid ino collision with legitimate xino values from upper + * layer (fsid 0), use the lowest xinobit to map the non + * persistent inode numbers to the unified st_ino address space. + */ + if (S_ISDIR(inode->i_mode)) { + ovl_next_ino(inode); + if (xinobits) { + inode->i_ino &= ~0UL >> xinobits; + inode->i_ino |= 1UL << xinoshift; + } } +} + +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid) +{ + struct inode *realinode; + + if (oip->upperdentry) + OVL_I(inode)->__upperdentry = oip->upperdentry; + if (oip->lowerpath && oip->lowerpath->dentry) + OVL_I(inode)->lower = igrab(d_inode(oip->lowerpath->dentry)); + if (oip->lowerdata) + OVL_I(inode)->lowerdata = igrab(d_inode(oip->lowerdata)); + + realinode = ovl_inode_real(inode); + ovl_copyattr(realinode, inode); + ovl_copyflags(realinode, inode); + ovl_map_ino(inode, ino, fsid); +} + +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) +{ inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL @@ -719,7 +766,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode, rdev, 0, 0); + ovl_fill_inode(inode, mode, rdev); return inode; } @@ -891,7 +938,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, oip->index); - int fsid = bylower ? oip->lowerpath->layer->fsid : 0; + int fsid = bylower ? lowerpath->layer->fsid : 0; bool is_dir, metacopy = false; unsigned long ino = 0; int err = oip->newinode ? -EEXIST : -ENOMEM; @@ -941,9 +988,11 @@ struct inode *ovl_get_inode(struct super_block *sb, err = -ENOMEM; goto out_err; } + ino = realinode->i_ino; + fsid = lowerpath->layer->fsid; } - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); - ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_inode_init(inode, oip, ino, fsid); if (upperdentry && ovl_is_impuredir(upperdentry)) ovl_set_flag(OVL_IMPURE, inode); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index ed9e129fae04..0db23baf98e7 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -845,7 +845,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (err) goto out; - if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) { + if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) { dput(upperdentry); err = -EREMOTE; goto out; @@ -1076,6 +1076,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out_free_oe; } + ovl_dentry_update_reval(dentry, upperdentry, + DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + revert_creds(old_cred); if (origin_path) { dput(origin_path->dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3d3f2b8bdae5..e6f3670146ed 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -48,6 +48,12 @@ enum ovl_entry_flag { OVL_E_CONNECTED, }; +enum { + OVL_XINO_OFF, + OVL_XINO_AUTO, + OVL_XINO_ON, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -87,7 +93,7 @@ struct ovl_fb { u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ uuid_t uuid; /* uuid of filesystem */ - u32 fid[0]; /* file identifier should be 32bit aligned in-memory */ + u32 fid[]; /* file identifier should be 32bit aligned in-memory */ } __packed; /* In-memory and on-wire format for overlay file handle */ @@ -230,6 +236,8 @@ bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); @@ -264,8 +272,6 @@ void ovl_set_upperdata(struct inode *inode); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); -void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *lowerdata); void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_dentry_version_get(struct dentry *dentry); @@ -301,6 +307,16 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); } +/* + * With xino=auto, we do best effort to keep all inodes on same st_dev and + * d_ino consistent with st_ino. + * With xino=on, we do the same effort but we warn if we failed. + */ +static inline bool ovl_xino_warn(struct super_block *sb) +{ + return OVL_FS(sb)->config.xino == OVL_XINO_ON; +} + /* All layers on same fs? */ static inline bool ovl_same_fs(struct super_block *sb) { @@ -410,6 +426,8 @@ struct ovl_inode_params { char *redirect; struct dentry *lowerdata; }; +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); @@ -451,6 +469,7 @@ struct ovl_cattr { struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct inode *dir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 89015ea822e7..5762d802fe01 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -75,6 +75,8 @@ struct ovl_fs { struct inode *indexdir_trap; /* -1: disabled, 0: same fs, 1..32: number of unused ino bits */ int xino_mode; + /* For allocation of non-persistent inode numbers */ + atomic_long_t last_ino; }; static inline struct ovl_fs *OVL_FS(struct super_block *sb) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 40ac9ce2465a..e452ff7d583d 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -438,15 +438,23 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) /* Map inode number to lower fs unique range */ static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid, - const char *name, int namelen) + const char *name, int namelen, bool warn) { - if (ino >> (64 - xinobits)) { - pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", - namelen, name, ino, xinobits); + unsigned int xinoshift = 64 - xinobits; + + if (unlikely(ino >> xinoshift)) { + if (warn) { + pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n", + namelen, name, ino, xinobits); + } return ino; } - return ino | ((u64)fsid) << (64 - xinobits); + /* + * The lowest xinobit is reserved for mapping the non-peresistent inode + * numbers range, but this range is only exposed via st_ino, not here. + */ + return ino | ((u64)fsid) << (xinoshift + 1); } /* @@ -515,7 +523,8 @@ get: } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, ovl_layer_lower(this)->fsid, - p->name, p->len); + p->name, p->len, + ovl_xino_warn(dir->d_sb)); } out: @@ -645,6 +654,7 @@ struct ovl_readdir_translate { u64 parent_ino; int fsid; int xinobits; + bool xinowarn; }; static int ovl_fill_real(struct dir_context *ctx, const char *name, @@ -665,7 +675,7 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name, ino = p->ino; } else if (rdt->xinobits) { ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid, - name, namelen); + name, namelen, rdt->xinowarn); } return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type); @@ -696,6 +706,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) .ctx.actor = ovl_fill_real, .orig_ctx = ctx, .xinobits = ovl_xino_bits(dir->d_sb), + .xinowarn = ovl_xino_warn(dir->d_sb), }; if (rdt.xinobits && lower_layer) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ac967f1cb6e5..732ad5495c92 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -113,53 +113,54 @@ bug: return dentry; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) { - struct ovl_entry *oe = dentry->d_fsdata; - unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; - - if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); - if (ret < 0) - return ret; - if (!ret) { - if (!(flags & LOOKUP_RCU)) - d_invalidate(d); - return -ESTALE; - } + if (weak) { + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) + ret = d->d_op->d_weak_revalidate(d, flags); + } else if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + ret = -ESTALE; } } - return 1; + return ret; } -static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate_common(struct dentry *dentry, + unsigned int flags, bool weak) { struct ovl_entry *oe = dentry->d_fsdata; + struct dentry *upper; unsigned int i; int ret = 1; - for (i = 0; i < oe->numlower; i++) { - struct dentry *d = oe->lowerstack[i].dentry; + upper = ovl_dentry_upper(dentry); + if (upper) + ret = ovl_revalidate_real(upper, flags, weak); - if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { - ret = d->d_op->d_weak_revalidate(d, flags); - if (ret <= 0) - break; - } + for (i = 0; ret > 0 && i < oe->numlower; i++) { + ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, + weak); } return ret; } -static const struct dentry_operations ovl_dentry_operations = { - .d_release = ovl_dentry_release, - .d_real = ovl_d_real, -}; +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, false); +} + +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + return ovl_dentry_revalidate_common(dentry, flags, true); +} -static const struct dentry_operations ovl_reval_dentry_operations = { +static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, @@ -316,12 +317,6 @@ static const char *ovl_redirect_mode_def(void) return ovl_redirect_dir_def ? "on" : "off"; } -enum { - OVL_XINO_OFF, - OVL_XINO_AUTO, - OVL_XINO_ON, -}; - static const char * const ovl_xino_str[] = { "off", "auto", @@ -751,13 +746,12 @@ static int ovl_mount_dir(const char *name, struct path *path) ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); - if (!err) - if (ovl_dentry_remote(path->dentry)) { - pr_err("filesystem on '%s' not supported as upperdir\n", - tmp); - path_put_init(path); - err = -EINVAL; - } + if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { + pr_err("filesystem on '%s' not supported as upperdir\n", + tmp); + path_put_init(path); + err = -EINVAL; + } kfree(tmp); } return err; @@ -778,7 +772,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, } static int ovl_lower_dir(const char *name, struct path *path, - struct ovl_fs *ofs, int *stack_depth, bool *remote) + struct ovl_fs *ofs, int *stack_depth) { int fh_type; int err; @@ -793,9 +787,6 @@ static int ovl_lower_dir(const char *name, struct path *path, *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); - if (ovl_dentry_remote(path->dentry)) - *remote = true; - /* * The inodes index feature and NFS export need to encode and decode * file handles, so they require that all layers support them. @@ -1074,11 +1065,73 @@ out: return err; } +/* + * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and + * negative values if error is encountered. + */ +static int ovl_check_rename_whiteout(struct dentry *workdir) +{ + struct inode *dir = d_inode(workdir); + struct dentry *temp; + struct dentry *dest; + struct dentry *whiteout; + struct name_snapshot name; + int err; + + inode_lock_nested(dir, I_MUTEX_PARENT); + + temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0)); + err = PTR_ERR(temp); + if (IS_ERR(temp)) + goto out_unlock; + + dest = ovl_lookup_temp(workdir); + err = PTR_ERR(dest); + if (IS_ERR(dest)) { + dput(temp); + goto out_unlock; + } + + /* Name is inline and stable - using snapshot as a copy helper */ + take_dentry_name_snapshot(&name, temp); + err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT); + if (err) { + if (err == -EINVAL) + err = 0; + goto cleanup_temp; + } + + whiteout = lookup_one_len(name.name.name, workdir, name.name.len); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto cleanup_temp; + + err = ovl_is_whiteout(whiteout); + + /* Best effort cleanup of whiteout and temp file */ + if (err) + ovl_cleanup(dir, whiteout); + dput(whiteout); + +cleanup_temp: + ovl_cleanup(dir, temp); + release_dentry_name_snapshot(&name); + dput(temp); + dput(dest); + +out_unlock: + inode_unlock(dir); + + return err; +} + static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, struct path *workpath) { struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; + bool rename_whiteout; + bool d_type; int fh_type; int err; @@ -1104,11 +1157,8 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, if (err < 0) goto out; - /* - * We allowed this configuration and don't want to break users over - * kernel upgrade. So warn instead of erroring out. - */ - if (!err) + d_type = err; + if (!d_type) pr_warn("upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ @@ -1119,6 +1169,16 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, else pr_warn("upper fs does not support tmpfile.\n"); + + /* Check if upper/work fs supports RENAME_WHITEOUT */ + err = ovl_check_rename_whiteout(ofs->workdir); + if (err < 0) + goto out; + + rename_whiteout = err; + if (!rename_whiteout) + pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); + /* * Check if upper/work fs supports trusted.overlay.* xattr */ @@ -1133,6 +1193,18 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); } + /* + * We allowed sub-optimal upper fs configuration and don't want to break + * users over kernel upgrade, but we never allowed remote upper fs, so + * we can enforce strict requirements for remote upper fs. + */ + if (ovl_dentry_remote(ofs->workdir) && + (!d_type || !rename_whiteout || ofs->noxattr)) { + pr_err("upper fs missing required features.\n"); + err = -EINVAL; + goto out; + } + /* Check if upper/work fs supports file handles */ fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); if (ofs->config.index && !fh_type) { @@ -1401,11 +1473,12 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, /* * When all layers on same fs, overlay can use real inode numbers. - * With mount option "xino=on", mounter declares that there are enough - * free high bits in underlying fs to hold the unique fsid. + * With mount option "xino=<on|auto>", mounter declares that there are + * enough free high bits in underlying fs to hold the unique fsid. * If overlayfs does encounter underlying inodes using the high xino * bits reserved for fsid, it emits a warning and uses the original - * inode number. + * inode number or a non persistent inode number allocated from a + * dedicated range. */ if (ofs->numfs - !ofs->upper_mnt == 1) { if (ofs->config.xino == OVL_XINO_ON) @@ -1413,14 +1486,16 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, ofs->xino_mode = 0; } else if (ofs->config.xino == OVL_XINO_OFF) { ofs->xino_mode = -1; - } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { + } else if (ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for encoding - * fsid, where fsid 0 is reserved for upper fs even with - * lower only overlay. + * fsid, where fsid 0 is reserved for upper fs (even with + * lower only overlay) +1 extra bit is reserved for the non + * persistent inode number range that is used for resolving + * xino lower bits overflow. */ - BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); - ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; + BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); + ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; } if (ofs->xino_mode > 0) { @@ -1440,7 +1515,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, char *lowertmp, *lower; struct path *stack = NULL; unsigned int stacklen, numlower = 0, i; - bool remote = false; struct ovl_entry *oe; err = -ENOMEM; @@ -1472,7 +1546,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { err = ovl_lower_dir(lower, &stack[numlower], ofs, - &sb->s_stack_depth, &remote); + &sb->s_stack_depth); if (err) goto out_err; @@ -1500,11 +1574,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, oe->lowerstack[i].layer = &ofs->layers[i+1]; } - if (remote) - sb->s_d_op = &ovl_reval_dentry_operations; - else - sb->s_d_op = &ovl_dentry_operations; - out: for (i = 0; i < numlower; i++) path_put(&stack[i]); @@ -1589,6 +1658,44 @@ static int ovl_check_overlapping_layers(struct super_block *sb, return 0; } +static struct dentry *ovl_get_root(struct super_block *sb, + struct dentry *upperdentry, + struct ovl_entry *oe) +{ + struct dentry *root; + struct ovl_path *lowerpath = &oe->lowerstack[0]; + unsigned long ino = d_inode(lowerpath->dentry)->i_ino; + int fsid = lowerpath->layer->fsid; + struct ovl_inode_params oip = { + .upperdentry = upperdentry, + .lowerpath = lowerpath, + }; + + root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); + if (!root) + return NULL; + + root->d_fsdata = oe; + + if (upperdentry) { + /* Root inode uses upper st_ino/i_ino */ + ino = d_inode(upperdentry)->i_ino; + fsid = 0; + ovl_dentry_set_upper_alias(root); + if (ovl_is_impuredir(upperdentry)) + ovl_set_flag(OVL_IMPURE, d_inode(root)); + } + + /* Root is always merge -> can have whiteouts */ + ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); + ovl_dentry_set_flag(OVL_E_CONNECTED, root); + ovl_set_upperdata(d_inode(root)); + ovl_inode_init(d_inode(root), &oip, ino, fsid); + ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); + + return root; +} + static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { }; @@ -1598,6 +1705,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) struct cred *cred; int err; + sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); if (!ofs) @@ -1624,6 +1733,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; + atomic_long_set(&ofs->last_ino, 1); /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ if (ofs->config.xino != OVL_XINO_OFF) { ofs->xino_mode = BITS_PER_LONG - 32; @@ -1710,25 +1820,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= SB_POSIXACL; err = -ENOMEM; - root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); + root_dentry = ovl_get_root(sb, upperpath.dentry, oe); if (!root_dentry) goto out_free_oe; - root_dentry->d_fsdata = oe; - mntput(upperpath.mnt); - if (upperpath.dentry) { - ovl_dentry_set_upper_alias(root_dentry); - if (ovl_is_impuredir(upperpath.dentry)) - ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); - } - - /* Root is always merge -> can have whiteouts */ - ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); - ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); - ovl_set_upperdata(d_inode(root_dentry)); - ovl_inode_init(d_inode(root_dentry), upperpath.dentry, - ovl_dentry_lower(root_dentry), NULL); sb->s_root = root_dentry; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 042f7eb4f7f4..36b60788ee47 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -93,8 +93,24 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower) bool ovl_dentry_remote(struct dentry *dentry) { return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_REAL); + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask) +{ + struct ovl_entry *oe = OVL_E(dentry); + unsigned int i, flags = 0; + + if (upperdentry) + flags |= upperdentry->d_flags; + for (i = 0; i < oe->numlower; i++) + flags |= oe->lowerstack[i].dentry->d_flags; + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~mask; + dentry->d_flags |= flags & mask; + spin_unlock(&dentry->d_lock); } bool ovl_dentry_weird(struct dentry *dentry) @@ -386,24 +402,6 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) oi->redirect = redirect; } -void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *lowerdata) -{ - struct inode *realinode = d_inode(upperdentry ?: lowerdentry); - - if (upperdentry) - OVL_I(inode)->__upperdentry = upperdentry; - if (lowerdentry) - OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); - if (lowerdata) - OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata)); - - ovl_copyattr(realinode, inode); - ovl_copyflags(realinode, inode); - if (!inode->i_ino) - inode->i_ino = realinode->i_ino; -} - void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) { struct inode *upperinode = d_inode(upperdentry); @@ -416,8 +414,6 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; if (inode_unhashed(inode)) { - if (!inode->i_ino) - inode->i_ino = upperinode->i_ino; inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } |