diff options
Diffstat (limited to 'fs')
44 files changed, 441 insertions, 258 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4c09d93d9569..b2f82cf6bf86 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -170,8 +170,8 @@ config BINFMT_MISC You can do other nice things, too. Read the file <file:Documentation/binfmt_misc.txt> to learn how to use this - feature, <file:Documentation/java.txt> for information about how - to include Java support. and <file:Documentation/mono.txt> for + feature, <file:Documentation/admin-guide/java.rst> for information about how + to include Java support. and <file:Documentation/admin-guide/mono.rst> for information about how to include Mono-based .NET support. To use binfmt_misc, you will need to mount it: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2472af2798c7..e6c1bd443806 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2204,7 +2204,9 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); - vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL); + if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) + goto end_coredump; + vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz)); if (!vma_filesz) goto end_coredump; @@ -2311,7 +2313,7 @@ end_coredump: cleanup: free_note_info(&info); kfree(shdr4extnum); - kfree(vma_filesz); + vfree(vma_filesz); kfree(phdr4note); kfree(elf); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74ed5aae6cea..180f910339f4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = { void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { struct super_block *sb = fs_info->sb; - char lvl[4]; + char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1]; struct va_format vaf; va_list args; - const char *type = logtypes[4]; + const char *type = NULL; int kern_level; struct ratelimit_state *ratelimit; va_start(args, fmt); - kern_level = printk_get_level(fmt); - if (kern_level) { + while ((kern_level = printk_get_level(fmt)) != 0) { size_t size = printk_skip_level(fmt) - fmt; - memcpy(lvl, fmt, size); - lvl[size] = '\0'; + + if (kern_level >= '0' && kern_level <= '7') { + memcpy(lvl, fmt, size); + lvl[size] = '\0'; + type = logtypes[kern_level - '0']; + ratelimit = &printk_limits[kern_level - '0']; + } fmt += size; - type = logtypes[kern_level - '0']; - ratelimit = &printk_limits[kern_level - '0']; - } else { + } + + if (!type) { *lvl = '\0'; - /* Default to debug output */ - ratelimit = &printk_limits[7]; + type = logtypes[4]; + ratelimit = &printk_limits[4]; } vaf.fmt = fmt; diff --git a/fs/buffer.c b/fs/buffer.c index b205a629001d..1613656028d6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3403,7 +3403,7 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void buffer_exit_cpu(int cpu) +static int buffer_exit_cpu_dead(unsigned int cpu) { int i; struct bh_lru *b = &per_cpu(bh_lrus, cpu); @@ -3414,14 +3414,7 @@ static void buffer_exit_cpu(int cpu) } this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); per_cpu(bh_accounting, cpu).nr = 0; -} - -static int buffer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) - buffer_exit_cpu((unsigned long)hcpu); - return NOTIFY_OK; + return 0; } /** @@ -3471,6 +3464,7 @@ EXPORT_SYMBOL(bh_submit_read); void __init buffer_init(void) { unsigned long nrpages; + int ret; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, @@ -3483,5 +3477,7 @@ void __init buffer_init(void) */ nrpages = (nr_free_buffer_pages() * 10) / 100; max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); - hotcpu_notifier(buffer_cpu_notify, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead", + NULL, buffer_exit_cpu_dead); + WARN_ON(ret < 0); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 78180d151730..a594c7879cc2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1261,26 +1261,30 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; op = ceph_snap(dir) == CEPH_SNAPDIR ? - CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; + CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (!IS_ERR(req)) { req->r_dentry = dget(dentry); - req->r_num_caps = 2; + req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2; mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; req->r_args.getattr.mask = mask; - req->r_locked_dir = dir; err = ceph_mdsc_do_request(mdsc, NULL, req); - if (err == 0 || err == -ENOENT) { - if (dentry == req->r_dentry) { - valid = !d_unhashed(dentry); - } else { - d_invalidate(req->r_dentry); - err = -EAGAIN; - } + switch (err) { + case 0: + if (d_really_is_positive(dentry) && + d_inode(dentry) == req->r_target_inode) + valid = 1; + break; + case -ENOENT: + if (d_really_is_negative(dentry)) + valid = 1; + /* Fallthrough */ + default: + break; } ceph_mdsc_put_request(req); dout("d_revalidate %p lookup result=%d\n", @@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) radix_tree_deref_slot_protected(slot, &mapping->tree_lock); entry |= RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(slot, (void *)entry); + radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); return (void *)entry; } @@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) radix_tree_deref_slot_protected(slot, &mapping->tree_lock); entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; - radix_tree_replace_slot(slot, (void *)entry); + radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); return (void *)entry; } @@ -643,12 +643,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, } mapping->nrexceptional++; } else { + struct radix_tree_node *node; void **slot; void *ret; - ret = __radix_tree_lookup(page_tree, index, NULL, &slot); + ret = __radix_tree_lookup(page_tree, index, &node, &slot); WARN_ON_ONCE(ret != entry); - radix_tree_replace_slot(slot, new_entry); + __radix_tree_replace(page_tree, node, slot, + new_entry, NULL, NULL); } if (vmf->flags & FAULT_FLAG_WRITE) radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 1e6e227134d7..0643ae44f342 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -16,11 +16,7 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; -static struct genl_family family = { - .id = GENL_ID_GENERATE, - .name = DLM_GENL_NAME, - .version = DLM_GENL_VERSION, -}; +static struct genl_family family; static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) { @@ -76,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = { }, }; +static struct genl_family family __ro_after_init = { + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, + .ops = dlm_nl_ops, + .n_ops = ARRAY_SIZE(dlm_nl_ops), + .module = THIS_MODULE, +}; + int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family(&family); } void dlm_netlink_exit(void) diff --git a/fs/exec.c b/fs/exec.c index 4e497b9ee71e..923c57d96899 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1169,8 +1169,10 @@ no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; +#ifdef CONFIG_POSIX_TIMERS exit_itimers(sig); flush_itimer_signals(); +#endif if (atomic_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05713a5da083..ef600591d96f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1769,15 +1769,13 @@ static long wb_writeback(struct bdi_writeback *wb, * become available for writeback. Otherwise * we'll just busyloop. */ - if (!list_empty(&wb->b_more_io)) { - trace_writeback_wait(wb, work); - inode = wb_inode(wb->b_more_io.prev); - spin_lock(&inode->i_lock); - spin_unlock(&wb->list_lock); - /* This function drops i_lock... */ - inode_sleep_on_writeback(inode); - spin_lock(&wb->list_lock); - } + trace_writeback_wait(wb, work); + inode = wb_inode(wb->b_more_io.prev); + spin_lock(&inode->i_lock); + spin_unlock(&wb->list_lock); + /* This function drops i_lock... */ + inode_sleep_on_writeback(inode); + spin_lock(&wb->list_lock); } spin_unlock(&wb->list_lock); blk_finish_plug(&plug); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3ebe512d64c..096f79997f75 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1739,8 +1739,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) * This should be done on write(), truncate() and chown(). */ if (!fc->handle_killpriv) { - int kill; - /* * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. @@ -1750,12 +1748,11 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) return ret; attr->ia_mode = inode->i_mode; - kill = should_remove_suid(entry); - if (kill & ATTR_KILL_SUID) { + if (inode->i_mode & S_ISUID) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISUID; } - if (kill & ATTR_KILL_SGID) { + if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { attr->ia_valid |= ATTR_MODE; attr->ia_mode &= ~S_ISGID; } diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5426189406c1..fb8cac88251a 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -15,6 +15,6 @@ struct lockd_net { struct list_head nsm_handles; }; -extern int lockd_net_id; +extern unsigned int lockd_net_id; #endif diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index fc4084ef4736..1c13dd80744f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,7 +57,7 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -int lockd_net_id; +unsigned int lockd_net_id; /* * These can be set at insmod time (useful for NFS as root filesystem), diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5ecc97e..ce42dd00e4ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2015,7 +2015,7 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } -int nfs_net_id; +unsigned int nfs_net_id; EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fbce0d885d4c..5fbd2bde91ba 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -35,6 +35,6 @@ struct nfs_net { #endif }; -extern int nfs_net_id; +extern unsigned int nfs_net_id; #endif diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac4..420d3a0ab258 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -9,7 +9,7 @@ #include <net/netns/generic.h> #include <linux/fs.h> -static int grace_net_id; +static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); /** diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ee36efd5aece..3714231a9d0f 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -124,5 +124,5 @@ struct nfsd_net { /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) -extern int nfsd_net_id; +extern unsigned int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 36b2af931e06..2857e46d5cc5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1201,7 +1201,7 @@ static int create_proc_exports_entry(void) } #endif -int nfsd_net_id; +unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..8c9fb29c6673 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -118,7 +118,7 @@ again: return ret; } -static int open_related_ns(struct ns_common *ns, +int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..9a88984f9f6f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1950,8 +1950,7 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, } int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + loff_t pos, unsigned len, unsigned copied, void *fsdata) { int i, ret; unsigned from, to, start = pos & (PAGE_SIZE - 1); @@ -2064,7 +2063,7 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, int ret; struct inode *inode = mapping->host; - ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); + ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata); up_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_inode_unlock(inode, 1); @@ -2241,7 +2240,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, dwc->dw_zero_count++; } - ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc); + ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); BUG_ON(ret != len); ret = 0; unlock: diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index b1c9f28a57b1..8614ff069d99 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -44,8 +44,7 @@ int walk_page_buffers( handle_t *handle, struct buffer_head *bh)); int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); + loff_t pos, unsigned len, unsigned copied, void *fsdata); typedef enum { OCFS2_WRITE_BUFFER = 0, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..9158c9825094 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -741,7 +741,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg, hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block; memset(hb_block, 0, reg->hr_block_bytes); /* TODO: time stuff */ - cputime = CURRENT_TIME.tv_sec; + cputime = ktime_get_real_seconds(); if (!cputime) cputime = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3f828a187049..a464c8088170 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1609,8 +1609,6 @@ way_up_top: __dlm_insert_mle(dlm, mle); response = DLM_MASTER_RESP_NO; } else { - // mlog(0, "mle was found\n"); - set_maybe = 1; spin_lock(&tmpmle->spinlock); if (tmpmle->master == dlm->node_num) { mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n"); @@ -1625,8 +1623,7 @@ way_up_top: response = DLM_MASTER_RESP_NO; } else response = DLM_MASTER_RESP_MAYBE; - if (set_maybe) - set_bit(request->node_idx, tmpmle->maybe_map); + set_bit(request->node_idx, tmpmle->maybe_map); spin_unlock(&tmpmle->spinlock); } spin_unlock(&dlm->master_lock); @@ -1644,12 +1641,6 @@ send_response: * dlm_assert_master_worker() isn't called, we drop it here. */ if (dispatch_assert) { - if (response != DLM_MASTER_RESP_YES) - mlog(ML_ERROR, "invalid response %d\n", response); - if (!res) { - mlog(ML_ERROR, "bad lockres while trying to assert!\n"); - BUG(); - } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); spin_lock(&res->spinlock); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index dd5cb8bcefd1..74407c6dd592 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2966,8 +2966,6 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&dlm->spinlock); dlm_kick_recovery_thread(dlm); break; - default: - BUG(); } mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index c56a7679df93..382401d3e88f 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -703,7 +703,7 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail_commit; } - di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); + di->i_dtime = cpu_to_le64(ktime_get_real_seconds()); di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a244f14c6b87..d5e5fa7f0743 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1947,7 +1947,7 @@ static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) */ seqno++; os->os_count++; - os->os_scantime = CURRENT_TIME; + os->os_scantime = ktime_get_seconds(); unlock: ocfs2_orphan_scan_unlock(osb, seqno); out: @@ -2004,7 +2004,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; - os->os_scantime = CURRENT_TIME; + os->os_scantime = ktime_get_seconds(); if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 71545ad4628c..429088786e93 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -120,8 +120,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, ret = VM_FAULT_NOPAGE; goto out; } - ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, - fsdata); + ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata); BUG_ON(ret != len); ret = VM_FAULT_LOCKED; out: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8d887c75765c..3b0a10d9b36f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -516,6 +516,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, struct ocfs2_extent_list *fel; u16 feat; struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct timespec64 ts; *new_fe_bh = NULL; @@ -564,10 +565,11 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); + ktime_get_real_ts64(&ts); fe->i_atime = fe->i_ctime = fe->i_mtime = - cpu_to_le64(CURRENT_TIME.tv_sec); + cpu_to_le64(ts.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = - cpu_to_le32(CURRENT_TIME.tv_nsec); + cpu_to_le32(ts.tv_nsec); fe->i_dtime = 0; /* diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e63af7ddfe68..7e5958b0be6b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -224,7 +224,7 @@ struct ocfs2_orphan_scan { struct ocfs2_super *os_osb; struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */ struct delayed_work os_orphan_scan_work; - struct timespec os_scantime; /* time this node ran the scan */ + time64_t os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ atomic_t os_state; /* ACTIVE or INACTIVE */ diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19238512a324..738b4ea8e990 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -478,7 +478,6 @@ again: if (ret) { mlog_errno(ret); ocfs2_unlock_refcount_tree(osb, tree, rw); - ocfs2_refcount_tree_put(tree); goto out; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f56fe39fab04..c894d945b084 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -337,7 +337,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) out += snprintf(buf + out, len - out, "Disabled\n"); else out += snprintf(buf + out, len - out, "%lu seconds ago\n", - (get_seconds() - os->os_scantime.tv_sec)); + (unsigned long)(ktime_get_seconds() - os->os_scantime)); out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); diff --git a/fs/proc/array.c b/fs/proc/array.c index 81818adb8e9e..51a4213afa2e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -245,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header, if (sigismember(set, i+2)) x |= 2; if (sigismember(set, i+3)) x |= 4; if (sigismember(set, i+4)) x |= 8; - seq_printf(m, "%x", x); + seq_putc(m, hex_asc[x]); } while (i >= 4); seq_putc(m, '\n'); @@ -342,10 +342,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) static inline void task_seccomp(struct seq_file *m, struct task_struct *p) { + seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p)); #ifdef CONFIG_SECCOMP - seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode); - seq_putc(m, '\n'); + seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_putc(m, '\n'); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index ca651ac00660..9b99df4893a4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -104,9 +104,12 @@ * in /proc for a task before it execs a suid executable. */ +static u8 nlink_tid; +static u8 nlink_tgid; + struct pid_entry { const char *name; - int len; + unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -139,13 +142,13 @@ struct pid_entry { * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ -static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, +static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; - count = 0; + count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; @@ -1967,7 +1970,7 @@ out: struct map_files_info { fmode_t mode; - unsigned long len; + unsigned int len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -2412,14 +2415,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir, * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ - last = &ents[nents - 1]; - for (p = ents; p <= last; p++) { + last = &ents[nents]; + for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } - if (p > last) + if (p >= last) goto out; error = proc_pident_instantiate(dir, dentry, task, p); @@ -2444,7 +2447,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx, if (ctx->pos >= nents + 2) goto out; - for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; @@ -3068,8 +3071,7 @@ static int proc_pid_instantiate(struct inode *dir, inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, - ARRAY_SIZE(tgid_base_stuff))); + set_nlink(inode, nlink_tgid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3361,8 +3363,7 @@ static int proc_task_instantiate(struct inode *dir, inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, - ARRAY_SIZE(tid_base_stuff))); + set_nlink(inode, nlink_tid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3552,3 +3553,9 @@ static const struct file_operations proc_task_operations = { .iterate_shared = proc_task_readdir, .llseek = generic_file_llseek, }; + +void __init set_proc_pid_nlink(void) +{ + nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); + nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e69ebe648a34..783bc19644d1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -138,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde) /* pde is locked */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) { + /* + * close() (proc_reg_release()) can't delete an entry and proceed: + * ->release hook needs to be available at the right moment. + * + * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: + * "struct file" needs to be available at the right moment. + * + * Therefore, first process to enter this function does ->release() and + * signals its completion to the other process which does nothing. + */ if (pdeo->closing) { /* somebody else is doing that, just wait */ DECLARE_COMPLETION_ONSTACK(c); @@ -147,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); } else { struct file *file; - pdeo->closing = 1; + pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); file = pdeo->file; pde->proc_fops->release(file_inode(file), file); spin_lock(&pde->pde_unload_lock); - list_del_init(&pdeo->lh); + /* After ->release. */ + list_del(&pdeo->lh); if (pdeo->c) complete(pdeo->c); kfree(pdeo); @@ -167,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de) if (atomic_add_return(BIAS, &de->in_use) != BIAS) wait_for_completion(&c); + /* ->pde_openers list can't grow from now on. */ + spin_lock(&de->pde_unload_lock); while (!list_empty(&de->pde_openers)) { struct pde_opener *pdeo; @@ -312,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct pde_opener *pdeo; /* - * What for, you ask? Well, we can have open, rmmod, remove_proc_entry - * sequence. ->release won't be called because ->proc_fops will be - * cleared. Depending on complexity of ->release, consequences vary. + * Ensure that + * 1) PDE's ->release hook will be called no matter what + * either normally by close()/->release, or forcefully by + * rmmod/remove_proc_entry. + * + * 2) rmmod isn't blocked by opening file in /proc and sitting on + * the descriptor (including "rmmod foo </proc/foo" scenario). * - * We can't wait for mercy when close will be done for real, it's - * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release - * by hand in remove_proc_entry(). For this, save opener's credentials - * for later. + * Save every "struct file" with custom ->release hook. */ - pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL); + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); if (!pdeo) return -ENOMEM; @@ -338,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (rv == 0 && release) { /* To know what to release. */ pdeo->file = file; - /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->closing = false; + pdeo->c = NULL; spin_lock(&pde->pde_unload_lock); list_add(&pdeo->lh, &pde->pde_openers); spin_unlock(&pde->pde_unload_lock); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5378441ec1b7..bbba5d22aada 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -203,7 +203,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name); struct pde_opener { struct file *file; struct list_head lh; - int closing; + bool closing; struct completion *c; }; extern const struct inode_operations proc_link_inode_operations; @@ -211,6 +211,7 @@ extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern void proc_init_inodecache(void); +void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern int proc_fill_super(struct super_block *, void *data, int flags); extern void proc_entry_rundown(struct proc_dir_entry *); diff --git a/fs/proc/root.c b/fs/proc/root.c index 8d3e484055a6..4bd0373576b5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -122,6 +122,7 @@ void __init proc_root_init(void) int err; proc_init_inodecache(); + set_proc_pid_nlink(); err = register_filesystem(&proc_fs_type); if (err) return; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 35b92d81692f..958f32545064 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1588,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); + cond_resched(); return 0; } #ifdef CONFIG_HUGETLB_PAGE diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index be40813eff52..b42e5bd6d8ff 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -86,4 +86,4 @@ config PSTORE_RAM Note that for historical reasons, the module will be named "ramoops.ko". - For more information, see Documentation/ramoops.txt. + For more information, see Documentation/admin-guide/ramoops.rst. diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index d4887705bb61..899d0ba0bd6c 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -27,6 +27,9 @@ #include <asm/barrier.h> #include "internal.h" +/* This doesn't need to be atomic: speed is chosen over correctness here. */ +static u64 pstore_ftrace_stamp; + static void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, @@ -42,6 +45,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, rec.ip = ip; rec.parent_ip = parent_ip; + pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++); pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, 0, sizeof(rec), psinfo); @@ -71,10 +75,13 @@ static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf, if (!on ^ pstore_ftrace_enabled) goto out; - if (on) + if (on) { + ftrace_ops_set_global_filter(&pstore_ftrace_ops); ret = register_ftrace_function(&pstore_ftrace_ops); - else + } else { ret = unregister_ftrace_function(&pstore_ftrace_ops); + } + if (ret) { pr_err("%s: unable to %sregister ftrace ops: %zd\n", __func__, on ? "" : "un", ret); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 1781dc50762e..57c0646479f5 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -107,9 +107,11 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v) struct pstore_ftrace_seq_data *data = v; struct pstore_ftrace_record *rec = (void *)(ps->data + data->off); - seq_printf(s, "%d %08lx %08lx %pf <- %pF\n", - pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip, - (void *)rec->ip, (void *)rec->parent_ip); + seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n", + pstore_ftrace_decode_cpu(rec), + pstore_ftrace_read_timestamp(rec), + rec->ip, rec->parent_ip, (void *)rec->ip, + (void *)rec->parent_ip); return 0; } @@ -197,11 +199,14 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) if (err) return err; - if (p->psi->erase) + if (p->psi->erase) { + mutex_lock(&p->psi->read_mutex); p->psi->erase(p->type, p->id, p->count, d_inode(dentry)->i_ctime, p->psi); - else + mutex_unlock(&p->psi->read_mutex); + } else { return -EPERM; + } return simple_unlink(dir, dentry); } diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index e38a22b31282..da416e6591c9 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -5,40 +5,6 @@ #include <linux/time.h> #include <linux/pstore.h> -#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB) -#define PSTORE_CPU_IN_IP 0x1 -#elif NR_CPUS <= 4 && defined(CONFIG_ARM) -#define PSTORE_CPU_IN_IP 0x3 -#endif - -struct pstore_ftrace_record { - unsigned long ip; - unsigned long parent_ip; -#ifndef PSTORE_CPU_IN_IP - unsigned int cpu; -#endif -}; - -static inline void -pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu) -{ -#ifndef PSTORE_CPU_IN_IP - rec->cpu = cpu; -#else - rec->ip |= cpu; -#endif -} - -static inline unsigned int -pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec) -{ -#ifndef PSTORE_CPU_IN_IP - return rec->cpu; -#else - return rec->ip & PSTORE_CPU_IN_IP; -#endif -} - #ifdef CONFIG_PSTORE_FTRACE extern void pstore_register_ftrace(void); extern void pstore_unregister_ftrace(void); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 14984d902a99..729677e18e36 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -493,6 +493,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, if (!is_locked) { pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" , in_nmi() ? "NMI" : why); + return; } } else { spin_lock_irqsave(&psinfo->buf_lock, flags); @@ -584,8 +585,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); + psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, + s, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6ad831b9d1b8..27c059e1760a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -85,10 +85,10 @@ MODULE_PARM_DESC(ramoops_ecc, "bytes ECC)"); struct ramoops_context { - struct persistent_ram_zone **przs; - struct persistent_ram_zone *cprz; - struct persistent_ram_zone *fprz; - struct persistent_ram_zone *mprz; + struct persistent_ram_zone **dprzs; /* Oops dump zones */ + struct persistent_ram_zone *cprz; /* Console zone */ + struct persistent_ram_zone **fprzs; /* Ftrace zones */ + struct persistent_ram_zone *mprz; /* PMSG zone */ phys_addr_t phys_addr; unsigned long size; unsigned int memtype; @@ -97,12 +97,14 @@ struct ramoops_context { size_t ftrace_size; size_t pmsg_size; int dump_oops; + u32 flags; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; unsigned int dump_write_cnt; /* _read_cnt need clear on ramoops_pstore_open */ unsigned int dump_read_cnt; unsigned int console_read_cnt; + unsigned int max_ftrace_cnt; unsigned int ftrace_read_cnt; unsigned int pmsg_read_cnt; struct pstore_info pstore; @@ -180,16 +182,69 @@ static bool prz_ok(struct persistent_ram_zone *prz) persistent_ram_ecc_string(prz, NULL, 0)); } +static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, + struct persistent_ram_zone *src) +{ + size_t dest_size, src_size, total, dest_off, src_off; + size_t dest_idx = 0, src_idx = 0, merged_idx = 0; + void *merged_buf; + struct pstore_ftrace_record *drec, *srec, *mrec; + size_t record_size = sizeof(struct pstore_ftrace_record); + + dest_off = dest->old_log_size % record_size; + dest_size = dest->old_log_size - dest_off; + + src_off = src->old_log_size % record_size; + src_size = src->old_log_size - src_off; + + total = dest_size + src_size; + merged_buf = kmalloc(total, GFP_KERNEL); + if (!merged_buf) + return -ENOMEM; + + drec = (struct pstore_ftrace_record *)(dest->old_log + dest_off); + srec = (struct pstore_ftrace_record *)(src->old_log + src_off); + mrec = (struct pstore_ftrace_record *)(merged_buf); + + while (dest_size > 0 && src_size > 0) { + if (pstore_ftrace_read_timestamp(&drec[dest_idx]) < + pstore_ftrace_read_timestamp(&srec[src_idx])) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } else { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + } + + while (dest_size > 0) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } + + while (src_size > 0) { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + + kfree(dest->old_log); + dest->old_log = merged_buf; + dest->old_log_size = total; + + return 0; +} + static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, bool *compressed, ssize_t *ecc_notice_size, struct pstore_info *psi) { - ssize_t size; + ssize_t size = 0; struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz = NULL; int header_length = 0; + bool free_prz = false; /* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but * PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have @@ -201,7 +256,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, /* Find the next valid persistent_ram_zone for DMESG */ while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) { - prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, + prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, PSTORE_TYPE_DMESG, 1); if (!prz_ok(prz)) @@ -219,14 +274,56 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 1, id, type, PSTORE_TYPE_CONSOLE, 0); - if (!prz_ok(prz)) - prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, - 1, id, type, PSTORE_TYPE_FTRACE, 0); + if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, 1, id, type, PSTORE_TYPE_PMSG, 0); - if (!prz_ok(prz)) - return 0; + + /* ftrace is last since it may want to dynamically allocate memory. */ + if (!prz_ok(prz)) { + if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { + prz = ramoops_get_next_prz(cxt->fprzs, + &cxt->ftrace_read_cnt, 1, id, type, + PSTORE_TYPE_FTRACE, 0); + } else { + /* + * Build a new dummy record which combines all the + * per-cpu records including metadata and ecc info. + */ + struct persistent_ram_zone *tmp_prz, *prz_next; + + tmp_prz = kzalloc(sizeof(struct persistent_ram_zone), + GFP_KERNEL); + if (!tmp_prz) + return -ENOMEM; + free_prz = true; + + while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { + prz_next = ramoops_get_next_prz(cxt->fprzs, + &cxt->ftrace_read_cnt, + cxt->max_ftrace_cnt, id, + type, PSTORE_TYPE_FTRACE, 0); + + if (!prz_ok(prz_next)) + continue; + + tmp_prz->ecc_info = prz_next->ecc_info; + tmp_prz->corrected_bytes += + prz_next->corrected_bytes; + tmp_prz->bad_blocks += prz_next->bad_blocks; + size = ftrace_log_combine(tmp_prz, prz_next); + if (size) + goto out; + } + *id = 0; + prz = tmp_prz; + } + } + + if (!prz_ok(prz)) { + size = 0; + goto out; + } size = persistent_ram_old_size(prz) - header_length; @@ -234,12 +331,21 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, *ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); *buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL); - if (*buf == NULL) - return -ENOMEM; + if (*buf == NULL) { + size = -ENOMEM; + goto out; + } memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); + persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1); +out: + if (free_prz) { + kfree(prz->old_log); + kfree(prz); + } + return size; } @@ -283,15 +389,23 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, persistent_ram_write(cxt->cprz, buf, size); return 0; } else if (type == PSTORE_TYPE_FTRACE) { - if (!cxt->fprz) + int zonenum; + + if (!cxt->fprzs) return -ENOMEM; - persistent_ram_write(cxt->fprz, buf, size); + /* + * Choose zone by if we're using per-cpu buffers. + */ + if (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + zonenum = smp_processor_id(); + else + zonenum = 0; + + persistent_ram_write(cxt->fprzs[zonenum], buf, size); return 0; } else if (type == PSTORE_TYPE_PMSG) { - if (!cxt->mprz) - return -ENOMEM; - persistent_ram_write(cxt->mprz, buf, size); - return 0; + pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__); + return -EINVAL; } if (type != PSTORE_TYPE_DMESG) @@ -316,10 +430,10 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, if (part != 1) return -ENOSPC; - if (!cxt->przs) + if (!cxt->dprzs) return -ENOSPC; - prz = cxt->przs[cxt->dump_write_cnt]; + prz = cxt->dprzs[cxt->dump_write_cnt]; hlen = ramoops_write_kmsg_hdr(prz, compressed); if (size + hlen > prz->buffer_size) @@ -359,13 +473,15 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, case PSTORE_TYPE_DMESG: if (id >= cxt->max_dump_cnt) return -EINVAL; - prz = cxt->przs[id]; + prz = cxt->dprzs[id]; break; case PSTORE_TYPE_CONSOLE: prz = cxt->cprz; break; case PSTORE_TYPE_FTRACE: - prz = cxt->fprz; + if (id >= cxt->max_ftrace_cnt) + return -EINVAL; + prz = cxt->fprzs[id]; break; case PSTORE_TYPE_PMSG: prz = cxt->mprz; @@ -396,68 +512,113 @@ static void ramoops_free_przs(struct ramoops_context *cxt) { int i; - if (!cxt->przs) - return; + /* Free dump PRZs */ + if (cxt->dprzs) { + for (i = 0; i < cxt->max_dump_cnt; i++) + persistent_ram_free(cxt->dprzs[i]); - for (i = 0; i < cxt->max_dump_cnt; i++) - persistent_ram_free(cxt->przs[i]); + kfree(cxt->dprzs); + cxt->max_dump_cnt = 0; + } - kfree(cxt->przs); - cxt->max_dump_cnt = 0; + /* Free ftrace PRZs */ + if (cxt->fprzs) { + for (i = 0; i < cxt->max_ftrace_cnt; i++) + persistent_ram_free(cxt->fprzs[i]); + kfree(cxt->fprzs); + cxt->max_ftrace_cnt = 0; + } } -static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, - phys_addr_t *paddr, size_t dump_mem_sz) +static int ramoops_init_przs(const char *name, + struct device *dev, struct ramoops_context *cxt, + struct persistent_ram_zone ***przs, + phys_addr_t *paddr, size_t mem_sz, + ssize_t record_size, + unsigned int *cnt, u32 sig, u32 flags) { int err = -ENOMEM; int i; + size_t zone_sz; + struct persistent_ram_zone **prz_ar; - if (!cxt->record_size) + /* Allocate nothing for 0 mem_sz or 0 record_size. */ + if (mem_sz == 0 || record_size == 0) { + *cnt = 0; return 0; + } - if (*paddr + dump_mem_sz - cxt->phys_addr > cxt->size) { - dev_err(dev, "no room for dumps\n"); - return -ENOMEM; + /* + * If we have a negative record size, calculate it based on + * mem_sz / *cnt. If we have a positive record size, calculate + * cnt from mem_sz / record_size. + */ + if (record_size < 0) { + if (*cnt == 0) + return 0; + record_size = mem_sz / *cnt; + if (record_size == 0) { + dev_err(dev, "%s record size == 0 (%zu / %u)\n", + name, mem_sz, *cnt); + goto fail; + } + } else { + *cnt = mem_sz / record_size; + if (*cnt == 0) { + dev_err(dev, "%s record count == 0 (%zu / %zu)\n", + name, mem_sz, record_size); + goto fail; + } } - cxt->max_dump_cnt = dump_mem_sz / cxt->record_size; - if (!cxt->max_dump_cnt) - return -ENOMEM; + if (*paddr + mem_sz - cxt->phys_addr > cxt->size) { + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, + mem_sz, (unsigned long long)*paddr, + cxt->size, (unsigned long long)cxt->phys_addr); + goto fail; + } - cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt, - GFP_KERNEL); - if (!cxt->przs) { - dev_err(dev, "failed to initialize a prz array for dumps\n"); - goto fail_mem; + zone_sz = mem_sz / *cnt; + if (!zone_sz) { + dev_err(dev, "%s zone size == 0\n", name); + goto fail; } - for (i = 0; i < cxt->max_dump_cnt; i++) { - cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0, + prz_ar = kcalloc(*cnt, sizeof(**przs), GFP_KERNEL); + if (!prz_ar) + goto fail; + + for (i = 0; i < *cnt; i++) { + prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig, &cxt->ecc_info, - cxt->memtype); - if (IS_ERR(cxt->przs[i])) { - err = PTR_ERR(cxt->przs[i]); - dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", - cxt->record_size, (unsigned long long)*paddr, err); + cxt->memtype, flags); + if (IS_ERR(prz_ar[i])) { + err = PTR_ERR(prz_ar[i]); + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, record_size, + (unsigned long long)*paddr, err); while (i > 0) { i--; - persistent_ram_free(cxt->przs[i]); + persistent_ram_free(prz_ar[i]); } - goto fail_prz; + kfree(prz_ar); + goto fail; } - *paddr += cxt->record_size; + *paddr += zone_sz; } + *przs = prz_ar; return 0; -fail_prz: - kfree(cxt->przs); -fail_mem: - cxt->max_dump_cnt = 0; + +fail: + *cnt = 0; return err; } -static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, +static int ramoops_init_prz(const char *name, + struct device *dev, struct ramoops_context *cxt, struct persistent_ram_zone **prz, phys_addr_t *paddr, size_t sz, u32 sig) { @@ -465,18 +626,19 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return 0; if (*paddr + sz - cxt->phys_addr > cxt->size) { - dev_err(dev, "no room for mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", - sz, (unsigned long long)*paddr, + dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n", + name, sz, (unsigned long long)*paddr, cxt->size, (unsigned long long)cxt->phys_addr); return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, + cxt->memtype, 0); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); - dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", - sz, (unsigned long long)*paddr, err); + dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n", + name, sz, (unsigned long long)*paddr, err); return err; } @@ -543,6 +705,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, parse_size("ftrace-size", pdata->ftrace_size); parse_size("pmsg-size", pdata->pmsg_size); parse_size("ecc-size", pdata->ecc_info.ecc_size); + parse_size("flags", pdata->flags); #undef parse_size @@ -561,6 +724,7 @@ static int ramoops_probe(struct platform_device *pdev) if (dev_of_node(dev) && !pdata) { pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) { + pr_err("cannot allocate platform data buffer\n"); err = -ENOMEM; goto fail_out; } @@ -570,11 +734,20 @@ static int ramoops_probe(struct platform_device *pdev) goto fail_out; } - /* Only a single ramoops area allowed at a time, so fail extra + /* + * Only a single ramoops area allowed at a time, so fail extra * probes. */ - if (cxt->max_dump_cnt) + if (cxt->max_dump_cnt) { + pr_err("already initialized\n"); goto fail_out; + } + + /* Make sure we didn't get bogus platform data pointer. */ + if (!pdata) { + pr_err("NULL platform data\n"); + goto fail_out; + } if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && !pdata->ftrace_size && !pdata->pmsg_size)) { @@ -600,27 +773,37 @@ static int ramoops_probe(struct platform_device *pdev) cxt->ftrace_size = pdata->ftrace_size; cxt->pmsg_size = pdata->pmsg_size; cxt->dump_oops = pdata->dump_oops; + cxt->flags = pdata->flags; cxt->ecc_info = pdata->ecc_info; paddr = cxt->phys_addr; dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size - cxt->pmsg_size; - err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); + err = ramoops_init_przs("dump", dev, cxt, &cxt->dprzs, &paddr, + dump_mem_sz, cxt->record_size, + &cxt->max_dump_cnt, 0, 0); if (err) goto fail_out; - err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr, + err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr, cxt->console_size, 0); if (err) goto fail_init_cprz; - err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size, - LINUX_VERSION_CODE); + cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? nr_cpu_ids + : 1; + err = ramoops_init_przs("ftrace", dev, cxt, &cxt->fprzs, &paddr, + cxt->ftrace_size, -1, + &cxt->max_ftrace_cnt, LINUX_VERSION_CODE, + (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU) + ? PRZ_FLAG_NO_LOCK : 0); if (err) goto fail_init_fprz; - err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0); + err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr, + cxt->pmsg_size, 0); if (err) goto fail_init_mprz; @@ -680,7 +863,6 @@ fail_clear: cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); fail_init_mprz: - persistent_ram_free(cxt->fprz); fail_init_fprz: persistent_ram_free(cxt->cprz); fail_init_cprz: @@ -699,7 +881,6 @@ static int ramoops_remove(struct platform_device *pdev) cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); - persistent_ram_free(cxt->fprz); persistent_ram_free(cxt->cprz); ramoops_free_przs(cxt); @@ -741,6 +922,8 @@ static void ramoops_register_dummy(void) dummy_data->ftrace_size = ramoops_ftrace_size; dummy_data->pmsg_size = ramoops_pmsg_size; dummy_data->dump_oops = dump_oops; + dummy_data->flags = RAMOOPS_FLAG_FTRACE_PER_CPU; + /* * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC * (using 1 byte for ECC isn't much of use anyway). diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3975deec02f8..a857338b7dab 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -48,16 +48,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) return atomic_read(&prz->buffer->start); } -static DEFINE_RAW_SPINLOCK(buffer_lock); - /* increase and wrap the start pointer, returning the old value */ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) { int old; int new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->start); new = old + a; @@ -65,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) new -= prz->buffer_size; atomic_set(&prz->buffer->start, new); - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); return old; } @@ -75,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) { size_t old; size_t new; - unsigned long flags; + unsigned long flags = 0; - raw_spin_lock_irqsave(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_lock_irqsave(&prz->buffer_lock, flags); old = atomic_read(&prz->buffer->size); if (old == prz->buffer_size) @@ -89,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a) atomic_set(&prz->buffer->size, new); exit: - raw_spin_unlock_irqrestore(&buffer_lock, flags); + if (!(prz->flags & PRZ_FLAG_NO_LOCK)) + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags); } static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, @@ -465,7 +467,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, } static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, - struct persistent_ram_ecc_info *ecc_info) + struct persistent_ram_ecc_info *ecc_info, + unsigned long flags) { int ret; @@ -493,6 +496,8 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig, prz->buffer->sig = sig; persistent_ram_zap(prz); + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + prz->flags = flags; return 0; } @@ -517,7 +522,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz) struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype) + unsigned int memtype, u32 flags) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -532,7 +537,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, if (ret) goto err; - ret = persistent_ram_post_init(prz, sig, ecc_info); + ret = persistent_ram_post_init(prz, sig, ecc_info, flags); if (ret) goto err; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 8b252673d454..e99b1a72d9a7 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -12,14 +12,8 @@ static const struct genl_multicast_group quota_mcgrps[] = { }; /* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, +static struct genl_family quota_genl_family __ro_after_init = { + .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, |