From ae7871be189cb41184f1e05742b4a99e2c59774d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Dec 2016 14:28:41 +0100 Subject: swiotlb: Convert swiotlb_force from int to enum Convert the flag swiotlb_force from an int to an enum, to prepare for the advent of more possible values. Suggested-by: Konrad Rzeszutek Wilk Signed-off-by: Geert Uytterhoeven Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 7 ++++++- include/trace/events/swiotlb.h | 16 +++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 183f37c8a5e1..71d104e4c849 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -9,7 +9,12 @@ struct device; struct page; struct scatterlist; -extern int swiotlb_force; +enum swiotlb_force { + SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ + SWIOTLB_FORCE, /* swiotlb=force */ +}; + +extern enum swiotlb_force swiotlb_force; /* * Maximum allowable number of contiguous slabs to map, diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 7ea4c5e7c448..5e2e30a7efce 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -11,16 +11,16 @@ TRACE_EVENT(swiotlb_bounced, TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size, - int swiotlb_force), + enum swiotlb_force swiotlb_force), TP_ARGS(dev, dev_addr, size, swiotlb_force), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( int, swiotlb_force ) + __string( dev_name, dev_name(dev) ) + __field( u64, dma_mask ) + __field( dma_addr_t, dev_addr ) + __field( size_t, size ) + __field( enum swiotlb_force, swiotlb_force ) ), TP_fast_assign( @@ -37,7 +37,9 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __entry->swiotlb_force ? "swiotlb_force" : "" ) + __print_symbolic(__entry->swiotlb_force, + { SWIOTLB_NORMAL, "NORMAL" }, + { SWIOTLB_FORCE, "FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ -- cgit v1.2.3-58-ga151 From fff5d99225107f5f13fe4a9805adc2a1c4b5fb00 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Dec 2016 14:28:42 +0100 Subject: swiotlb: Add swiotlb=noforce debug option On architectures like arm64, swiotlb is tied intimately to the core architecture DMA support. In addition, ZONE_DMA cannot be disabled. To aid debugging and catch devices not supporting DMA to memory outside the 32-bit address space, add a kernel command line option "swiotlb=noforce", which disables the use of bounce buffers. If specified, trying to map memory that cannot be used with DMA will fail, and a rate-limited warning will be printed. Note that io_tlb_nslabs is set to 1, which is the minimal supported value. Signed-off-by: Geert Uytterhoeven Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/admin-guide/kernel-parameters.txt | 3 ++- include/linux/swiotlb.h | 1 + include/trace/events/swiotlb.h | 3 ++- lib/swiotlb.c | 18 ++++++++++++++++-- 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be2d6d0a03a4..7d5f4f4cecfa 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3807,10 +3807,11 @@ it if 0 is given (See Documentation/cgroup-v1/memory.txt) swiotlb= [ARM,IA-64,PPC,MIPS,X86] - Format: { | force } + Format: { | force | noforce } -- Number of I/O TLB slabs force -- force using of bounce buffers even if they wouldn't be automatically used by the kernel + noforce -- Never use bounce buffers (for debugging) switches= [HW,M68k] diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 71d104e4c849..d9c84a9cde3d 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -12,6 +12,7 @@ struct scatterlist; enum swiotlb_force { SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ SWIOTLB_FORCE, /* swiotlb=force */ + SWIOTLB_NO_FORCE, /* swiotlb=noforce */ }; extern enum swiotlb_force swiotlb_force; diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 5e2e30a7efce..288c0c54a2b4 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -39,7 +39,8 @@ TRACE_EVENT(swiotlb_bounced, __entry->size, __print_symbolic(__entry->swiotlb_force, { SWIOTLB_NORMAL, "NORMAL" }, - { SWIOTLB_FORCE, "FORCE" })) + { SWIOTLB_FORCE, "FORCE" }, + { SWIOTLB_NO_FORCE, "NO_FORCE" })) ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a32dce6d5101..9def738af4f4 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -106,8 +106,12 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) + if (!strcmp(str, "force")) { swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } return 0; } @@ -543,8 +547,15 @@ static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); + dma_addr_t start_dma_addr; + + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -721,6 +732,9 @@ static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) { + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. -- cgit v1.2.3-58-ga151 From 72c5296f9d64d8f5f27c2133e5f108a45a353d71 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 20 Dec 2016 14:38:14 -0700 Subject: genhd: remove dead and duplicated scsi code blk_scsi_cmd_filter use was deprecated by 4beab5c6 and the SCSI macros are duplicated in blkdev.h, both likely reintroduced by a bad merge from 540eed56. Signed-off-by: Jon Derrick Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e0341af6950e..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -146,15 +146,6 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ }; -#define BLK_SCSI_MAX_CMDS (256) -#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) - -struct blk_scsi_cmd_filter { - unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; - unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; - struct kobject kobj; -}; - struct disk_part_tbl { struct rcu_head rcu_head; int len; -- cgit v1.2.3-58-ga151 From e3ba730702af370563f66cb610b71aa0ca67955e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Dec 2016 10:15:20 +0100 Subject: fsnotify: Remove fsnotify_duplicate_mark() There are only two calls sites of fsnotify_duplicate_mark(). Those are in kernel/audit_tree.c and both are bogus. Vfsmount pointer is unused for audit tree, inode pointer and group gets set in fsnotify_add_mark_locked() later anyway, mask and free_mark are already set in alloc_chunk(). In fact, calling fsnotify_duplicate_mark() is actively harmful because following fsnotify_add_mark_locked() will leak group reference by overwriting the group pointer. So just remove the two calls to fsnotify_duplicate_mark() and the function. Signed-off-by: Jan Kara [PM: line wrapping to fit in 80 chars] Signed-off-by: Paul Moore --- fs/notify/mark.c | 12 ------------ include/linux/fsnotify_backend.h | 2 -- kernel/audit_tree.c | 8 ++++---- 3 files changed, 4 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d3fea0bd89e2..6043306e8e21 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -510,18 +510,6 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) } } -void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) -{ - assert_spin_locked(&old->lock); - new->inode = old->inode; - new->mnt = old->mnt; - if (old->group) - fsnotify_get_group(old->group); - new->group = old->group; - new->mask = old->mask; - new->free_mark = old->free_mark; -} - /* * Nothing fancy, just initialize lists and locks and counters. */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7268ed076be8..ce77caa2bb10 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -324,8 +324,6 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(str extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); /* find (and take a reference) to a mark associated with group and vfsmount */ extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); -/* copy the values from old into new */ -extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old); /* set the ignored_mask of a mark */ extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask); /* set the mask of a mark (might pin the object into memory */ diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 055f11b0a50f..b4b58400531f 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -258,8 +258,8 @@ static void untag_chunk(struct node *p) if (!new) goto Fallback; - fsnotify_duplicate_mark(&new->mark, entry); - if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.inode, NULL, 1)) { + if (fsnotify_add_mark(&new->mark, + entry->group, entry->inode, NULL, 1)) { fsnotify_put_mark(&new->mark); goto Fallback; } @@ -395,8 +395,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) return -ENOENT; } - fsnotify_duplicate_mark(chunk_entry, old_entry); - if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->inode, NULL, 1)) { + if (fsnotify_add_mark(chunk_entry, + old_entry->group, old_entry->inode, NULL, 1)) { spin_unlock(&old_entry->lock); fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); -- cgit v1.2.3-58-ga151 From c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Aug 2016 17:22:44 +0200 Subject: mm: Invalidate DAX radix tree entries only if appropriate Currently invalidate_inode_pages2_range() and invalidate_mapping_pages() just delete all exceptional radix tree entries they find. For DAX this is not desirable as we track cache dirtiness in these entries and when they are evicted, we may not flush caches although it is necessary. This can for example manifest when we write to the same block both via mmap and via write(2) (to different offsets) and fsync(2) then does not properly flush CPU caches when modification via write(2) was the last one. Create appropriate DAX functions to handle invalidation of DAX entries for invalidate_inode_pages2_range() and invalidate_mapping_pages() and wire them up into the corresponding mm functions. Acked-by: Johannes Weiner Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 71 +++++++++++++++++++++++++++++++++++++++++++------- include/linux/dax.h | 3 +++ mm/truncate.c | 75 +++++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 125 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/dax.c b/fs/dax.c index a8732fbed381..bcfedd184860 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } +static int __dax_invalidate_mapping_entry(struct address_space *mapping, + pgoff_t index, bool trunc) +{ + int ret = 0; + void *entry; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = get_unlocked_mapping_entry(mapping, index, NULL); + if (!entry || !radix_tree_exceptional_entry(entry)) + goto out; + if (!trunc && + (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) + goto out; + radix_tree_delete(page_tree, index); + mapping->nrexceptional--; + ret = 1; +out: + put_unlocked_mapping_entry(mapping, index, entry); + spin_unlock_irq(&mapping->tree_lock); + return ret; +} /* * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree * entry to get unlocked before deleting it. */ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) { - void *entry; + int ret = __dax_invalidate_mapping_entry(mapping, index, true); - spin_lock_irq(&mapping->tree_lock); - entry = get_unlocked_mapping_entry(mapping, index, NULL); /* * This gets called from truncate / punch_hole path. As such, the caller * must hold locks protecting against concurrent modifications of the @@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) * caller has seen exceptional entry for this index, we better find it * at that index as well... */ - if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { - spin_unlock_irq(&mapping->tree_lock); - return 0; - } - radix_tree_delete(&mapping->page_tree, index); + WARN_ON_ONCE(!ret); + return ret; +} + +/* + * Invalidate exceptional DAX entry if easily possible. This handles DAX + * entries for invalidate_inode_pages() so we evict the entry only if we can + * do so without blocking. + */ +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) +{ + int ret = 0; + void *entry, **slot; + struct radix_tree_root *page_tree = &mapping->page_tree; + + spin_lock_irq(&mapping->tree_lock); + entry = __radix_tree_lookup(page_tree, index, NULL, &slot); + if (!entry || !radix_tree_exceptional_entry(entry) || + slot_locked(mapping, slot)) + goto out; + if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || + radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) + goto out; + radix_tree_delete(page_tree, index); mapping->nrexceptional--; + ret = 1; +out: spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, entry, true); + if (ret) + dax_wake_mapping_entry_waiter(mapping, index, entry, true); + return ret; +} - return 1; +/* + * Invalidate exceptional DAX entry if it is clean. + */ +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index) +{ + return __dax_invalidate_mapping_entry(mapping, index, false); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index f97bcfe79472..24ad71173995 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); +int dax_invalidate_mapping_entry_sync(struct address_space *mapping, + pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); diff --git a/mm/truncate.c b/mm/truncate.c index fd97f1dbce29..dd7b24e083c5 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -24,20 +24,12 @@ #include #include "internal.h" -static void clear_exceptional_entry(struct address_space *mapping, - pgoff_t index, void *entry) +static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, + void *entry) { struct radix_tree_node *node; void **slot; - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) - return; - - if (dax_mapping(mapping)) { - dax_delete_mapping_entry(mapping, index); - return; - } spin_lock_irq(&mapping->tree_lock); /* * Regular page slots are stabilized by the page lock even @@ -55,6 +47,56 @@ unlock: spin_unlock_irq(&mapping->tree_lock); } +/* + * Unconditionally remove exceptional entry. Usually called from truncate path. + */ +static void truncate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + if (dax_mapping(mapping)) { + dax_delete_mapping_entry(mapping, index); + return; + } + clear_shadow_entry(mapping, index, entry); +} + +/* + * Invalidate exceptional entry if easily possible. This handles exceptional + * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and + * clean entries. + */ +static int invalidate_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + +/* + * Invalidate exceptional entry if clean. This handles exceptional entries for + * invalidate_inode_pages2() so for DAX it evicts only clean entries. + */ +static int invalidate_exceptional_entry2(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return 1; + if (dax_mapping(mapping)) + return dax_invalidate_mapping_entry_sync(mapping, index); + clear_shadow_entry(mapping, index, entry); + return 1; +} + /** * do_invalidatepage - invalidate part or all of a page * @page: the page which is affected @@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping, } if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + truncate_exceptional_entry(mapping, index, + page); continue; } @@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + invalidate_exceptional_entry(mapping, index, + page); continue; } @@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, break; if (radix_tree_exceptional_entry(page)) { - clear_exceptional_entry(mapping, index, page); + if (!invalidate_exceptional_entry2(mapping, + index, page)) + ret = -EBUSY; continue; } -- cgit v1.2.3-58-ga151 From 56ab6b93007e5000a8812985aec1833c4a6a9ce0 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 25 Dec 2016 14:33:16 +0800 Subject: ipv4: Namespaceify tcp_tw_reuse knob Different namespaces might have different requirements to reuse TIME-WAIT sockets for new connections. This might be required in cases where different namespace applications are in place which require TIME_WAIT socket connections to be reduced independently of the host. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1b777e..0378e88f6fd3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b4c6b2..6061963cca98 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..22cbd61079b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -432,13 +432,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &tcp_adv_win_scale_min, .extra2 = &tcp_adv_win_scale_max, }, - { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_frto", .data = &sysctl_tcp_frto, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f533ada..fe9da4fb96bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: -- cgit v1.2.3-58-ga151 From be26727772cd86979255dfaf1eea967338dc0c9b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Dec 2016 10:49:54 +0800 Subject: net: xdp: remove unused bfp_warn_invalid_xdp_buffer() After commit 73b62bd085f4737679ea9afc7867fa5f99ba7d1b ("virtio-net: remove the warning before XDP linearizing"), there's no users for bpf_warn_invalid_xdp_buffer(), so remove it. This is a revert for commit f23bc46c30ca5ef58b8549434899fcbac41b2cfc. Cc: Daniel Borkmann Cc: John Fastabend Signed-off-by: Jason Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 1 - net/core/filter.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 702314253797..a0934e6c9bab 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/net/core/filter.c b/net/core/filter.c index 7190bd648154..b1461708a977 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -void bpf_warn_invalid_xdp_buffer(void) -{ - WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); -} -EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); - static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, -- cgit v1.2.3-58-ga151 From be53e38f0df21c3d45cdf4cede37ee73554cdbb8 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Fri, 9 Dec 2016 15:28:31 +0900 Subject: dt-bindings: mfd: Remove TPS65217 interrupts Interrupt numbers are from the datasheet, so no need to keep them in the ABI. Use the number in the DT file. Signed-off-by: Milo Kim Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-bone-common.dtsi | 8 +++----- include/dt-bindings/mfd/tps65217.h | 26 -------------------------- 2 files changed, 3 insertions(+), 31 deletions(-) delete mode 100644 include/dt-bindings/mfd/tps65217.h (limited to 'include') diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index 14b62690c464..3e32dd18fd25 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -6,8 +6,6 @@ * published by the Free Software Foundation. */ -#include - / { cpus { cpu@0 { @@ -319,13 +317,13 @@ ti,pmic-shutdown-controller; charger { - interrupts = , ; - interrupt-names = "AC", "USB"; + interrupts = <0>, <1>; + interrupt-names = "USB", "AC"; status = "okay"; }; pwrbutton { - interrupts = ; + interrupts = <2>; status = "okay"; }; diff --git a/include/dt-bindings/mfd/tps65217.h b/include/dt-bindings/mfd/tps65217.h deleted file mode 100644 index cafb9e60cf12..000000000000 --- a/include/dt-bindings/mfd/tps65217.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This header provides macros for TI TPS65217 DT bindings. - * - * Copyright (C) 2016 Texas Instruments - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#ifndef __DT_BINDINGS_TPS65217_H__ -#define __DT_BINDINGS_TPS65217_H__ - -#define TPS65217_IRQ_USB 0 -#define TPS65217_IRQ_AC 1 -#define TPS65217_IRQ_PB 2 - -#endif -- cgit v1.2.3-58-ga151 From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Dec 2016 14:58:39 +0200 Subject: Revert "net/mlx5: Add MPCNT register infrastructure" This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51. Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure") Signed-off-by: Gal Pressman Reported-by: Jesper Dangaard Brouer Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 5 --- include/linux/mlx5/driver.h | 1 - include/linux/mlx5/mlx5_ifc.h | 93 ------------------------------------------- 3 files changed, 99 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9f489365b3d3..52b437431c6a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1071,11 +1071,6 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -enum { - MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, - MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, -}; - static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ae55361e674..735b36335f29 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -123,7 +123,6 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, - MLX5_REG_MPCNT = 0x9051, }; enum mlx5_dcbx_oper_mode { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57bec544e20a..a852e9db6f0d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; -struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 rx_errors[0x20]; - - u8 tx_errors[0x20]; - - u8 l0_to_recovery_eieos[0x20]; - - u8 l0_to_recovery_ts[0x20]; - - u8 l0_to_recovery_framing[0x20]; - - u8 l0_to_recovery_retrain[0x20]; - - u8 crc_error_dllp[0x20]; - - u8 crc_error_tlp[0x20]; - - u8 reserved_at_140[0x680]; -}; - -struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { - u8 life_time_counter_high[0x20]; - - u8 life_time_counter_low[0x20]; - - u8 time_to_boot_image_start[0x20]; - - u8 time_to_link_image[0x20]; - - u8 calibration_time[0x20]; - - u8 time_to_first_perst[0x20]; - - u8 time_to_detect_state[0x20]; - - u8 time_to_l0[0x20]; - - u8 time_to_crs_en[0x20]; - - u8 time_to_plastic_image_start[0x20]; - - u8 time_to_iron_image_start[0x20]; - - u8 perst_handler[0x20]; - - u8 times_in_l1[0x20]; - - u8 times_in_l23[0x20]; - - u8 dl_down[0x20]; - - u8 config_cycle1usec[0x20]; - - u8 config_cycle2to7usec[0x20]; - - u8 config_cycle_8to15usec[0x20]; - - u8 config_cycle_16_to_63usec[0x20]; - - u8 config_cycle_64usec[0x20]; - - u8 correctable_err_msg_sent[0x20]; - - u8 non_fatal_err_msg_sent[0x20]; - - u8 fatal_err_msg_sent[0x20]; - - u8 reserved_at_2e0[0x4e0]; -}; - struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; -union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { - struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; - struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; - u8 reserved_at_0[0x7c0]; -}; - union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; -struct mlx5_ifc_mpcnt_reg_bits { - u8 reserved_at_0[0x8]; - u8 pcie_index[0x8]; - u8 reserved_at_10[0xa]; - u8 grp[0x6]; - - u8 clr[0x1]; - u8 reserved_at_21[0x1f]; - - union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; -}; - struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; - struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; -- cgit v1.2.3-58-ga151 From b91e1302ad9b80c174a4855533f7e3aa2873355e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 Dec 2016 11:40:38 -0800 Subject: mm: optimize PageWaiters bit use for unlock_page() In commit 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Nick Piggin made our page locking no longer unconditionally touch the hashed page waitqueue, which not only helps performance in general, but is particularly helpful on NUMA machines where the hashed wait queues can bounce around a lot. However, the "clear lock bit atomically and then test the waiters bit" sequence turns out to be much more expensive than it needs to be, because you get a nasty stall when trying to access the same word that just got updated atomically. On architectures where locking is done with LL/SC, this would be trivial to fix with a new primitive that clears one bit and tests another atomically, but that ends up not working on x86, where the only atomic operations that return the result end up being cmpxchg and xadd. The atomic bit operations return the old value of the same bit we changed, not the value of an unrelated bit. On x86, we could put the lock bit in the high bit of the byte, and use "xadd" with that bit (where the overflow ends up not touching other bits), and look at the other bits of the result. However, an even simpler model is to just use a regular atomic "and" to clear the lock bit, and then the sign bit in eflags will indicate the resulting state of the unrelated bit #7. So by moving the PageWaiters bit up to bit #7, we can atomically clear the lock bit and test the waiters bit on x86 too. And architectures with LL/SC (which is all the usual RISC suspects), the particular bit doesn't matter, so they are fine with this approach too. This avoids the extra access to the same atomic word, and thus avoids the costly stall at page unlock time. The only downside is that the interface ends up being a bit odd and specialized: clear a bit in a byte, and test the sign bit. Nick doesn't love the resulting name of the new primitive, but I'd rather make the name be descriptive and very clear about the limitation imposed by trying to work across all relevant architectures than make it be some generic thing that doesn't make the odd semantics explicit. So this introduces the new architecture primitive clear_bit_unlock_is_negative_byte(); and adds the trivial implementation for x86. We have a generic non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)" combination) which can be overridden by any architecture that can do better. According to Nick, Power has the same hickup x86 has, for example, but some other architectures may not even care. All these optimizations mean that my page locking stress-test (which is just executing a lot of small short-lived shell scripts: "make test" in the git source tree) no longer makes our page locking look horribly bad. Before all these optimizations, just the unlock_page() costs were just over 3% of all CPU overhead on "make test". After this, it's down to 0.66%, so just a quarter of the cost it used to be. (The difference on NUMA is bigger, but there this micro-optimization is likely less noticeable, since the big issue on NUMA was not the accesses to 'struct page', but the waitqueue accesses that were already removed by Nick's earlier commit). Acked-by: Nick Piggin Cc: Dave Hansen Cc: Bob Peterson Cc: Steven Whitehouse Cc: Andrew Lutomirski Cc: Andreas Gruenbacher Cc: Peter Zijlstra Cc: Mel Gorman Signed-off-by: Linus Torvalds --- arch/x86/include/asm/bitops.h | 13 +++++++++++++ include/linux/page-flags.h | 2 +- mm/filemap.c | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 68557f52b961..854022772c5b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -139,6 +139,19 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } +static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + bool negative; + asm volatile(LOCK_PREFIX "andb %2,%1\n\t" + CC_SET(s) + : CC_OUT(s) (negative), ADDR + : "ir" ((char) ~(1 << nr)) : "memory"); + return negative; +} + +// Let everybody know we have it +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte + /* * __clear_bit_unlock - Clears a bit in memory * @nr: Bit to clear diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c56b39890a41..6b5818d6de32 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -73,13 +73,13 @@ */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ - PG_waiters, /* Page has waiters, check its waitqueue */ PG_error, PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_active, + PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, diff --git a/mm/filemap.c b/mm/filemap.c index 82f26cde830c..6b1d96f86a9c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -912,6 +912,29 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -921,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); -- cgit v1.2.3-58-ga151 From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 29 Dec 2016 18:37:13 +0200 Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV Demoting simple flow steering rule priority (for DPDK) was achieved by wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper function for the PF and all VFs. In function mlx4_ib_create_flow(), this change caused the main rule creation for the PF to be wrapped, while it left the associated tunnel steering rule creation unwrapped for the PF. This mismatch caused rule deletion failures in mlx4_ib_destroy_flow() for the PF when the detach wrapper function did not find the associated tunnel-steering rule (since creation of that rule for the PF did not go through the wrapper function). Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native" (so that the PF invocation does not go through the wrapper), and perform the required priority demotion for the PF in the mlx4_ib_create_flow() code path. Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 18 ++++++++++++++++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 22 +--------------------- include/linux/mlx4/device.h | 2 ++ 4 files changed, 33 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c8413fc120e6..7031a8dd4d14 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att size += ret; } + if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR && + flow_attr->num_of_specs == 1) { + struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1); + enum ib_flow_spec_type header_spec = + ((union ib_flow_spec *)(flow_attr + 1))->type; + + if (header_spec == IB_FLOW_SPEC_ETH) + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); + } + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) @@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", reg_id); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5e7840a7a33b..bffa6f345f2f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) } EXPORT_SYMBOL(mlx4_is_slave_active); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header) +{ + if (is_multicast_ether_addr(eth_header->eth.dst_mac) || + is_broadcast_ether_addr(eth_header->eth.dst_mac)) { + struct mlx4_net_trans_rule_hw_eth *eth = + (struct mlx4_net_trans_rule_hw_eth *)eth_header; + struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); + bool last_rule = next_rule->size == 0 && next_rule->id == 0 && + next_rule->rsvd == 0; + + if (last_rule) + ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); + } +} +EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio); + static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4b3e139e9c82..56185a0b827d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, return 0; } -static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, - struct _rule_hw *eth_header) -{ - if (is_multicast_ether_addr(eth_header->eth.dst_mac) || - is_broadcast_ether_addr(eth_header->eth.dst_mac)) { - struct mlx4_net_trans_rule_hw_eth *eth = - (struct mlx4_net_trans_rule_hw_eth *)eth_header; - struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1); - bool last_rule = next_rule->size == 0 && next_rule->id == 0 && - next_rule->rsvd == 0; - - if (last_rule) - ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC); - } -} - /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. @@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); if (header_id == MLX4_NET_TRANS_RULE_ID_ETH) - handle_eth_header_mcast_prio(ctrl, rule_header); - - if (slave == dev->caps.function) - goto execute; + mlx4_handle_eth_header_mcast_prio(ctrl, rule_header); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, goto err_put_qp; } -execute: err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 93bdb3485192..6533c16e27ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); +void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, + struct _rule_hw *eth_header); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3-58-ga151 From 42930553a7c11f06351bc08b889808d0f6020f08 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 30 Dec 2016 08:13:38 -0700 Subject: vfio-mdev: de-polute the namespace, rename parent_device & parent_ops Add an mdev_ prefix so we're not poluting the namespace so much. Cc: Zhenyu Wang Cc: Zhi Wang Cc: Jike Song Signed-off-by: Alex Williamson Reviewed by: Kirti Wankhede --- Documentation/vfio-mediated-device.txt | 24 ++++++++++++------------ drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/vfio/mdev/mdev_core.c | 28 ++++++++++++++-------------- drivers/vfio/mdev/mdev_private.h | 6 +++--- drivers/vfio/mdev/mdev_sysfs.c | 8 ++++---- drivers/vfio/mdev/vfio_mdev.c | 12 ++++++------ include/linux/mdev.h | 16 ++++++++-------- samples/vfio-mdev/mtty.c | 2 +- 8 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index b38afec35edc..cfee106a3679 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -127,22 +127,22 @@ the VFIO when devices are unbound from the driver. Physical Device Driver Interface -------------------------------- -The physical device driver interface provides the parent_ops[3] structure to -define the APIs to manage work in the mediated core driver that is related to -the physical device. +The physical device driver interface provides the mdev_parent_ops[3] structure +to define the APIs to manage work in the mediated core driver that is related +to the physical device. -The structures in the parent_ops structure are as follows: +The structures in the mdev_parent_ops structure are as follows: * dev_attr_groups: attributes of the parent device * mdev_attr_groups: attributes of the mediated device * supported_config: attributes to define supported configurations -The functions in the parent_ops structure are as follows: +The functions in the mdev_parent_ops structure are as follows: * create: allocate basic resources in a driver for a mediated device * remove: free resources in a driver when a mediated device is destroyed -The callbacks in the parent_ops structure are as follows: +The callbacks in the mdev_parent_ops structure are as follows: * open: open callback of mediated device * close: close callback of mediated device @@ -151,14 +151,14 @@ The callbacks in the parent_ops structure are as follows: * write: write emulation callback * mmap: mmap emulation callback -A driver should use the parent_ops structure in the function call to register -itself with the mdev core driver: +A driver should use the mdev_parent_ops structure in the function call to +register itself with the mdev core driver: extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); -However, the parent_ops structure is not required in the function call that a -driver should use to unregister itself with the mdev core driver: +However, the mdev_parent_ops structure is not required in the function call +that a driver should use to unregister itself with the mdev core driver: extern void mdev_unregister_device(struct device *dev); @@ -394,5 +394,5 @@ References [1] See Documentation/vfio.txt for more information on VFIO. [2] struct mdev_driver in include/linux/mdev.h -[3] struct parent_ops in include/linux/mdev.h +[3] struct mdev_parent_ops in include/linux/mdev.h [4] struct vfio_iommu_driver_ops in include/linux/vfio.h diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 4dd6722a7339..081ada238107 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1089,7 +1089,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return 0; } -static const struct parent_ops intel_vgpu_ops = { +static const struct mdev_parent_ops intel_vgpu_ops = { .supported_type_groups = intel_vgpu_type_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 6bb4d4c469ab..bf3b3b0b3d2b 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -45,7 +45,7 @@ static int _find_mdev_device(struct device *dev, void *data) return 0; } -static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) +static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) { struct device *dev; @@ -59,9 +59,9 @@ static bool mdev_device_exist(struct parent_device *parent, uuid_le uuid) } /* Should be called holding parent_list_lock */ -static struct parent_device *__find_parent_device(struct device *dev) +static struct mdev_parent *__find_parent_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; list_for_each_entry(parent, &parent_list, next) { if (parent->dev == dev) @@ -72,8 +72,8 @@ static struct parent_device *__find_parent_device(struct device *dev) static void mdev_release_parent(struct kref *kref) { - struct parent_device *parent = container_of(kref, struct parent_device, - ref); + struct mdev_parent *parent = container_of(kref, struct mdev_parent, + ref); struct device *dev = parent->dev; kfree(parent); @@ -81,7 +81,7 @@ static void mdev_release_parent(struct kref *kref) } static -inline struct parent_device *mdev_get_parent(struct parent_device *parent) +inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent) { if (parent) kref_get(&parent->ref); @@ -89,7 +89,7 @@ inline struct parent_device *mdev_get_parent(struct parent_device *parent) return parent; } -static inline void mdev_put_parent(struct parent_device *parent) +static inline void mdev_put_parent(struct mdev_parent *parent) { if (parent) kref_put(&parent->ref, mdev_release_parent); @@ -98,7 +98,7 @@ static inline void mdev_put_parent(struct parent_device *parent) static int mdev_device_create_ops(struct kobject *kobj, struct mdev_device *mdev) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; ret = parent->ops->create(kobj, mdev); @@ -125,7 +125,7 @@ static int mdev_device_create_ops(struct kobject *kobj, */ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove) { - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; /* @@ -156,10 +156,10 @@ static int mdev_device_remove_cb(struct device *dev, void *data) * Add device to list of registered parent devices. * Returns a negative value on error, otherwise 0. */ -int mdev_register_device(struct device *dev, const struct parent_ops *ops) +int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) { int ret; - struct parent_device *parent; + struct mdev_parent *parent; /* check for mandatory ops */ if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups) @@ -232,7 +232,7 @@ EXPORT_SYMBOL(mdev_register_device); void mdev_unregister_device(struct device *dev) { - struct parent_device *parent; + struct mdev_parent *parent; bool force_remove = true; mutex_lock(&parent_list_lock); @@ -269,7 +269,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) { int ret; struct mdev_device *mdev; - struct parent_device *parent; + struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); parent = mdev_get_parent(type->parent); @@ -338,7 +338,7 @@ create_err: int mdev_device_remove(struct device *dev, bool force_remove) { struct mdev_device *mdev, *tmp; - struct parent_device *parent; + struct mdev_parent *parent; struct mdev_type *type; int ret; bool found = false; diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index d35097cbf3d7..0b72c2d9ee40 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -19,7 +19,7 @@ void mdev_bus_unregister(void); struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; - struct parent_device *parent; + struct mdev_parent *parent; struct list_head next; struct attribute_group *group; }; @@ -29,8 +29,8 @@ struct mdev_type { #define to_mdev_type(_kobj) \ container_of(_kobj, struct mdev_type, kobj) -int parent_create_sysfs_files(struct parent_device *parent); -void parent_remove_sysfs_files(struct parent_device *parent); +int parent_create_sysfs_files(struct mdev_parent *parent); +void parent_remove_sysfs_files(struct mdev_parent *parent); int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type); void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type); diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 1a53deb2ee10..802df210929b 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -92,7 +92,7 @@ static struct kobj_type mdev_type_ktype = { .release = mdev_type_release, }; -struct mdev_type *add_mdev_supported_type(struct parent_device *parent, +struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent, struct attribute_group *group) { struct mdev_type *type; @@ -158,7 +158,7 @@ static void remove_mdev_supported_type(struct mdev_type *type) kobject_put(&type->kobj); } -static int add_mdev_supported_type_groups(struct parent_device *parent) +static int add_mdev_supported_type_groups(struct mdev_parent *parent) { int i; @@ -183,7 +183,7 @@ static int add_mdev_supported_type_groups(struct parent_device *parent) } /* mdev sysfs functions */ -void parent_remove_sysfs_files(struct parent_device *parent) +void parent_remove_sysfs_files(struct mdev_parent *parent) { struct mdev_type *type, *tmp; @@ -196,7 +196,7 @@ void parent_remove_sysfs_files(struct parent_device *parent) kset_unregister(parent->mdev_types_kset); } -int parent_create_sysfs_files(struct parent_device *parent) +int parent_create_sysfs_files(struct mdev_parent *parent) { int ret; diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c index ffc36758cb84..fa848a701b8b 100644 --- a/drivers/vfio/mdev/vfio_mdev.c +++ b/drivers/vfio/mdev/vfio_mdev.c @@ -27,7 +27,7 @@ static int vfio_mdev_open(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; int ret; if (unlikely(!parent->ops->open)) @@ -46,7 +46,7 @@ static int vfio_mdev_open(void *device_data) static void vfio_mdev_release(void *device_data) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (likely(parent->ops->release)) parent->ops->release(mdev); @@ -58,7 +58,7 @@ static long vfio_mdev_unlocked_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->ioctl)) return -EINVAL; @@ -70,7 +70,7 @@ static ssize_t vfio_mdev_read(void *device_data, char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->read)) return -EINVAL; @@ -82,7 +82,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, size_t count, loff_t *ppos) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->write)) return -EINVAL; @@ -93,7 +93,7 @@ static ssize_t vfio_mdev_write(void *device_data, const char __user *buf, static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma) { struct mdev_device *mdev = device_data; - struct parent_device *parent = mdev->parent; + struct mdev_parent *parent = mdev->parent; if (unlikely(!parent->ops->mmap)) return -EINVAL; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index ec819e9a115a..853bb78e5866 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -14,9 +14,9 @@ #define MDEV_H /* Parent device */ -struct parent_device { - struct device *dev; - const struct parent_ops *ops; +struct mdev_parent { + struct device *dev; + const struct mdev_parent_ops *ops; /* internal */ struct kref ref; @@ -29,7 +29,7 @@ struct parent_device { /* Mediated device */ struct mdev_device { struct device dev; - struct parent_device *parent; + struct mdev_parent *parent; uuid_le uuid; void *driver_data; @@ -40,7 +40,7 @@ struct mdev_device { }; /** - * struct parent_ops - Structure to be registered for each parent device to + * struct mdev_parent_ops - Structure to be registered for each parent device to * register the device to mdev module. * * @owner: The module owner. @@ -86,10 +86,10 @@ struct mdev_device { * @mdev: mediated device structure * @vma: vma structure * Parent device that support mediated device should be registered with mdev - * module with parent_ops structure. + * module with mdev_parent_ops structure. **/ -struct parent_ops { +struct mdev_parent_ops { struct module *owner; const struct attribute_group **dev_attr_groups; const struct attribute_group **mdev_attr_groups; @@ -159,7 +159,7 @@ extern struct bus_type mdev_bus_type; #define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) extern int mdev_register_device(struct device *dev, - const struct parent_ops *ops); + const struct mdev_parent_ops *ops); extern void mdev_unregister_device(struct device *dev); extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 6b633a4ea333..1a74f0e488da 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -1402,7 +1402,7 @@ struct attribute_group *mdev_type_groups[] = { NULL, }; -struct parent_ops mdev_fops = { +struct mdev_parent_ops mdev_fops = { .owner = THIS_MODULE, .dev_attr_groups = mtty_dev_groups, .mdev_attr_groups = mdev_dev_groups, -- cgit v1.2.3-58-ga151 From 9372e6feaafb65d88f667ffb5b7b425f8568344f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 30 Dec 2016 08:13:41 -0700 Subject: vfio-mdev: Make mdev_parent private Rather than hoping for good behavior by marking some elements internal, enforce it by making the entire structure private and creating an accessor function for the one useful external field. Cc: Zhenyu Wang Cc: Zhi Wang Cc: Jike Song Signed-off-by: Alex Williamson Reviewed by: Kirti Wankhede --- Documentation/vfio-mediated-device.txt | 3 +++ drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/vfio/mdev/mdev_core.c | 6 ++++++ drivers/vfio/mdev/mdev_private.h | 10 ++++++++++ include/linux/mdev.h | 15 ++------------- samples/vfio-mdev/mtty.c | 2 +- 6 files changed, 23 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index cfee106a3679..d226c7a5ba8b 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -223,6 +223,9 @@ Directories and files under the sysfs for Each Physical Device sprintf(buf, "%s-%s", dev_driver_string(parent->dev), group->name); + (or using mdev_parent_dev(mdev) to arrive at the parent device outside + of the core mdev code) + * device_api This attribute should show which device API is being created, for example, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 081ada238107..38500329aa92 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -396,7 +396,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) struct device *pdev; void *gvt; - pdev = mdev->parent->dev; + pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index bf3b3b0b3d2b..30d05304241e 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -30,6 +30,12 @@ static struct class_compat *mdev_bus_compat_class; static LIST_HEAD(mdev_list); static DEFINE_MUTEX(mdev_list_lock); +struct device *mdev_parent_dev(struct mdev_device *mdev) +{ + return mdev->parent->dev; +} +EXPORT_SYMBOL(mdev_parent_dev); + static int _find_mdev_device(struct device *dev, void *data) { struct mdev_device *mdev; diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index 0b72c2d9ee40..b05dd22fc9a6 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -16,6 +16,16 @@ int mdev_bus_register(void); void mdev_bus_unregister(void); +struct mdev_parent { + struct device *dev; + const struct mdev_parent_ops *ops; + struct kref ref; + struct mutex lock; + struct list_head next; + struct kset *mdev_types_kset; + struct list_head type_list; +}; + struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 853bb78e5866..f586222b6c25 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -13,19 +13,6 @@ #ifndef MDEV_H #define MDEV_H -/* Parent device */ -struct mdev_parent { - struct device *dev; - const struct mdev_parent_ops *ops; - - /* internal */ - struct kref ref; - struct mutex lock; - struct list_head next; - struct kset *mdev_types_kset; - struct list_head type_list; -}; - /* Mediated device */ struct mdev_device { struct device dev; @@ -165,4 +152,6 @@ extern void mdev_unregister_device(struct device *dev); extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); extern void mdev_unregister_driver(struct mdev_driver *drv); +extern struct device *mdev_parent_dev(struct mdev_device *mdev); + #endif /* MDEV_H */ diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 1a74f0e488da..5e13efc62bad 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -734,7 +734,7 @@ int mtty_create(struct kobject *kobj, struct mdev_device *mdev) for (i = 0; i < 2; i++) { snprintf(name, MTTY_STRING_LEN, "%s-%d", - dev_driver_string(mdev->parent->dev), i + 1); + dev_driver_string(mdev_parent_dev(mdev)), i + 1); if (!strcmp(kobj->name, name)) { nr_ports = i + 1; break; -- cgit v1.2.3-58-ga151 From 99e3123e3d72616a829dad6d25aa005ef1ef9b13 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 30 Dec 2016 08:13:44 -0700 Subject: vfio-mdev: Make mdev_device private and abstract interfaces Abstract access to mdev_device so that we can define which interfaces are public rather than relying on comments in the structure. Cc: Zhenyu Wang Cc: Zhi Wang Signed-off-by: Alex Williamson Reviewed-by: Jike Song Reviewed by: Kirti Wankhede --- drivers/gpu/drm/i915/gvt/kvmgt.c | 18 +++++++++--------- drivers/vfio/mdev/mdev_core.c | 30 ++++++++++++++++++++++++++++++ drivers/vfio/mdev/mdev_private.h | 13 +++++++++++++ include/linux/mdev.h | 31 ++++++------------------------- samples/vfio-mdev/mtty.c | 24 +++++++++++++----------- 5 files changed, 71 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 38500329aa92..f8021a01df63 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -166,7 +166,7 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) { - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); struct gvt_dma *this; unsigned long g1; int rc; @@ -195,7 +195,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct device *dev = &vgpu->vdev.mdev->dev; + struct device *dev = mdev_dev(vgpu->vdev.mdev); unsigned long gfn; mutex_lock(&vgpu->vdev.cache_lock); @@ -418,7 +418,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) mdev_set_drvdata(mdev, vgpu); gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", - dev_name(&mdev->dev)); + dev_name(mdev_dev(mdev))); return 0; } @@ -482,7 +482,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, &vgpu->vdev.iommu_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for iommu failed: %d\n", ret); @@ -490,7 +490,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) } events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(&mdev->dev, VFIO_GROUP_NOTIFY, &events, + ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, &vgpu->vdev.group_notifier); if (ret != 0) { gvt_err("vfio_register_notifier for group failed: %d\n", ret); @@ -500,7 +500,7 @@ static int intel_vgpu_open(struct mdev_device *mdev) return kvmgt_guest_init(mdev); undo_iommu: - vfio_unregister_notifier(&mdev->dev, VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); out: return ret; @@ -513,9 +513,9 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (!handle_valid(vgpu->handle)) return; - vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_IOMMU_NOTIFY, + vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); - vfio_unregister_notifier(&vgpu->vdev.mdev->dev, VFIO_GROUP_NOTIFY, + vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY, &vgpu->vdev.group_notifier); info = (struct kvmgt_guest_info *)vgpu->handle; @@ -1372,7 +1372,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) return pfn; pfn = INTEL_GVT_INVALID_ADDR; - dev = &info->vgpu->vdev.mdev->dev; + dev = mdev_dev(info->vgpu->vdev.mdev); rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (rc != 1) { gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc); diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 30d05304241e..36d75c367d22 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -36,6 +36,36 @@ struct device *mdev_parent_dev(struct mdev_device *mdev) } EXPORT_SYMBOL(mdev_parent_dev); +void *mdev_get_drvdata(struct mdev_device *mdev) +{ + return mdev->driver_data; +} +EXPORT_SYMBOL(mdev_get_drvdata); + +void mdev_set_drvdata(struct mdev_device *mdev, void *data) +{ + mdev->driver_data = data; +} +EXPORT_SYMBOL(mdev_set_drvdata); + +struct device *mdev_dev(struct mdev_device *mdev) +{ + return &mdev->dev; +} +EXPORT_SYMBOL(mdev_dev); + +struct mdev_device *mdev_from_dev(struct device *dev) +{ + return dev_is_mdev(dev) ? to_mdev_device(dev) : NULL; +} +EXPORT_SYMBOL(mdev_from_dev); + +uuid_le mdev_uuid(struct mdev_device *mdev) +{ + return mdev->uuid; +} +EXPORT_SYMBOL(mdev_uuid); + static int _find_mdev_device(struct device *dev, void *data) { struct mdev_device *mdev; diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index b05dd22fc9a6..a9cefd70a705 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -26,6 +26,19 @@ struct mdev_parent { struct list_head type_list; }; +struct mdev_device { + struct device dev; + struct mdev_parent *parent; + uuid_le uuid; + void *driver_data; + struct kref ref; + struct list_head next; + struct kobject *type_kobj; +}; + +#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) +#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) + struct mdev_type { struct kobject kobj; struct kobject *devices_kobj; diff --git a/include/linux/mdev.h b/include/linux/mdev.h index f586222b6c25..3ee44b8d2bb3 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -13,18 +13,7 @@ #ifndef MDEV_H #define MDEV_H -/* Mediated device */ -struct mdev_device { - struct device dev; - struct mdev_parent *parent; - uuid_le uuid; - void *driver_data; - - /* internal */ - struct kref ref; - struct list_head next; - struct kobject *type_kobj; -}; +struct mdev_device; /** * struct mdev_parent_ops - Structure to be registered for each parent device to @@ -75,7 +64,6 @@ struct mdev_device { * Parent device that support mediated device should be registered with mdev * module with mdev_parent_ops structure. **/ - struct mdev_parent_ops { struct module *owner; const struct attribute_group **dev_attr_groups; @@ -129,22 +117,13 @@ struct mdev_driver { }; #define to_mdev_driver(drv) container_of(drv, struct mdev_driver, driver) -#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) -static inline void *mdev_get_drvdata(struct mdev_device *mdev) -{ - return mdev->driver_data; -} - -static inline void mdev_set_drvdata(struct mdev_device *mdev, void *data) -{ - mdev->driver_data = data; -} +extern void *mdev_get_drvdata(struct mdev_device *mdev); +extern void mdev_set_drvdata(struct mdev_device *mdev, void *data); +extern uuid_le mdev_uuid(struct mdev_device *mdev); extern struct bus_type mdev_bus_type; -#define dev_is_mdev(d) ((d)->bus == &mdev_bus_type) - extern int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops); extern void mdev_unregister_device(struct device *dev); @@ -153,5 +132,7 @@ extern int mdev_register_driver(struct mdev_driver *drv, struct module *owner); extern void mdev_unregister_driver(struct mdev_driver *drv); extern struct device *mdev_parent_dev(struct mdev_device *mdev); +extern struct device *mdev_dev(struct mdev_device *mdev); +extern struct mdev_device *mdev_from_dev(struct device *dev); #endif /* MDEV_H */ diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 5e13efc62bad..919c10d5b12e 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -164,7 +164,7 @@ static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) struct mdev_state *mds; list_for_each_entry(mds, &mdev_devices_list, next) { - if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0) + if (uuid_le_cmp(mdev_uuid(mds->mdev), uuid) == 0) return mds; } @@ -341,7 +341,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: Fifo level trigger\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } } else { #if defined(DEBUG_INTR) @@ -355,7 +356,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, */ if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RLSI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); break; @@ -374,7 +376,8 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, pr_err("Serial port %d: IER_THRI write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -445,7 +448,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR_OUT2 write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && @@ -453,7 +456,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, #if defined(DEBUG_INTR) pr_err("Serial port %d: MCR RTS/DTR write\n", index); #endif - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev)); } break; @@ -504,7 +507,8 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, #endif if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_THRI) - mtty_trigger_interrupt(mdev_state->mdev->uuid); + mtty_trigger_interrupt( + mdev_uuid(mdev_state->mdev)); } mutex_unlock(&mdev_state->rxtx_lock); @@ -1298,10 +1302,8 @@ static ssize_t sample_mdev_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = to_mdev_device(dev); - - if (mdev) - return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev)); + if (mdev_from_dev(dev)) + return sprintf(buf, "This is MDEV %s\n", dev_name(dev)); return sprintf(buf, "\n"); } -- cgit v1.2.3-58-ga151 From 65e4345c8ef8811bbb4860fe5f2df10646b7f2e1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 30 Dec 2016 23:54:18 +0100 Subject: iio: accel: st_accel: fix LIS3LV02 reading and scaling The LIS3LV02 has a special bit that need to be set to get the read values left aligned. Before this patch we get gibberish like this: iio_generic_buffer -a -c10 -n lis3lv02dl_accel (...) 0.000000 -0.010042 -0.642688 19155832931907 0.000000 -0.010042 -0.642688 19155858751073 Which is because we read a raw value for 1g as 64 which is the nominal 1024 for 1g shifted 4 bits to the left by being right-aligned rather than left aligned. Since all other sensors are left aligned, add some code to set the special DAS (data alignment setting) bit to 1 so that the right value is now read like this: iio_generic_buffer -a -c10 -n lis3lv02dl_accel (...) 0.000000 -0.147095 -10.120135 24761614364956 -0.029419 -0.176514 -10.120135 24761631624540 The scaling was weird as well: we have a gain of 1000 for 1g and 3000 for 6g. I don't even remember how I came up with the old values but they are wrong. Fixes: 3acddf74f807 ("iio: st-sensors: add support for lis3lv02d accelerometer") Cc: Lorenzo Bianconi Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 12 ++++++++++-- drivers/iio/common/st_sensors/st_sensors_core.c | 9 +++++++++ include/linux/iio/common/st_sensors.h | 12 ++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index f6b6d42385e1..784670e2736b 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -353,12 +353,12 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { [0] = { .num = ST_ACCEL_FS_AVL_2G, .value = 0x00, - .gain = IIO_G_TO_M_S_2(1024), + .gain = IIO_G_TO_M_S_2(1000), }, [1] = { .num = ST_ACCEL_FS_AVL_6G, .value = 0x01, - .gain = IIO_G_TO_M_S_2(340), + .gain = IIO_G_TO_M_S_2(3000), }, }, }, @@ -366,6 +366,14 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .addr = 0x21, .mask = 0x40, }, + /* + * Data Alignment Setting - needs to be set to get + * left-justified data like all other sensors. + */ + .das = { + .addr = 0x21, + .mask = 0x01, + }, .drdy_irq = { .addr = 0x21, .mask_int1 = 0x04, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index d5cf7f31eaf9..79c8c7cd70d5 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -401,6 +401,15 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, return err; } + /* set DAS */ + if (sdata->sensor_settings->das.addr) { + err = st_sensors_write_data_with_mask(indio_dev, + sdata->sensor_settings->das.addr, + sdata->sensor_settings->das.mask, 1); + if (err < 0) + return err; + } + if (sdata->int_pin_open_drain) { dev_info(&indio_dev->dev, "set interrupt line to open drain mode\n"); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 228bd44efa4c..497f2b3a5a62 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -115,6 +115,16 @@ struct st_sensor_bdu { u8 mask; }; +/** + * struct st_sensor_das - ST sensor device data alignment selection + * @addr: address of the register. + * @mask: mask to write the das flag for left alignment. + */ +struct st_sensor_das { + u8 addr; + u8 mask; +}; + /** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt * @addr: address of the register. @@ -185,6 +195,7 @@ struct st_sensor_transfer_function { * @enable_axis: Enable one or more axis of the sensor. * @fs: Full scale register and full scale list available. * @bdu: Block data update register. + * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. @@ -200,6 +211,7 @@ struct st_sensor_settings { struct st_sensor_axis enable_axis; struct st_sensor_fullscale fs; struct st_sensor_bdu bdu; + struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; bool multi_read_bit; unsigned int bootime; -- cgit v1.2.3-58-ga151 From 96a420d2d37cc019d0fbb95c9f0e965fa1080e1f Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Thu, 15 Dec 2016 12:47:41 +0000 Subject: usb: gadget: f_fs: Document eventfd effect on descriptor format. When FUNCTIONFS_EVENTFD flag is set, __ffs_data_got_descs reads a 32bits, little-endian value right after the fixed structure header, and passes it to eventfd_ctx_fdget. Document this. Also, rephrase a comment to be affirmative about the role of string descriptor at index 0. Ref: USB 2.0 spec paragraph "9.6.7 String", and also checked to still be current in USB 3.0 spec paragraph "9.6.9 String". Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 4 ++-- include/uapi/linux/usb/functionfs.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index aab3fc1dbb94..818f4997c1ac 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2091,8 +2091,8 @@ static int __ffs_data_do_entity(enum ffs_entity_type type, case FFS_STRING: /* - * Strings are indexed from 1 (0 is magic ;) reserved - * for languages list or some such) + * Strings are indexed from 1 (0 is reserved + * for languages list) */ if (*valuep > helper->ffs->strings_count) helper->ffs->strings_count = *valuep; diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index acc63697a0cc..b2a31a55a612 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -93,6 +93,7 @@ struct usb_ext_prop_desc { * | 0 | magic | LE32 | FUNCTIONFS_DESCRIPTORS_MAGIC_V2 | * | 4 | length | LE32 | length of the whole data chunk | * | 8 | flags | LE32 | combination of functionfs_flags | + * | | eventfd | LE32 | eventfd file descriptor | * | | fs_count | LE32 | number of full-speed descriptors | * | | hs_count | LE32 | number of high-speed descriptors | * | | ss_count | LE32 | number of super-speed descriptors | -- cgit v1.2.3-58-ga151 From c6ef7fd40eddad38a8825cbd6bb2ce8bdbba88f5 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 4 Jan 2017 15:08:15 -0500 Subject: vfio-mdev: fix non-standard ioctl return val causing i386 build fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What appears to be a copy and paste error from the line above gets the ioctl a ssize_t return value instead of the traditional "int". The associated sample code used "long" which meant it would compile for x86-64 but not i386, with the latter failing as follows: CC [M] samples/vfio-mdev/mtty.o samples/vfio-mdev/mtty.c:1418:20: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .ioctl = mtty_ioctl, ^ samples/vfio-mdev/mtty.c:1418:20: note: (near initialization for ‘mdev_fops.ioctl’) cc1: some warnings being treated as errors Since in this case, vfio is working with struct file_operations; as such: long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); ...and so here we just standardize on long vs. the normal int that user space typically sees and documents as per "man ioctl" and similar. Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.") Cc: Kirti Wankhede Cc: Neo Jia Cc: kvm@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Alex Williamson --- include/linux/mdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 3ee44b8d2bb3..b6e048e1045f 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -78,7 +78,7 @@ struct mdev_parent_ops { size_t count, loff_t *ppos); ssize_t (*write)(struct mdev_device *mdev, const char __user *buf, size_t count, loff_t *ppos); - ssize_t (*ioctl)(struct mdev_device *mdev, unsigned int cmd, + long (*ioctl)(struct mdev_device *mdev, unsigned int cmd, unsigned long arg); int (*mmap)(struct mdev_device *mdev, struct vm_area_struct *vma); }; -- cgit v1.2.3-58-ga151 From c7858bf16c0b2cc62f475f31e6df28c3a68da1d6 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 3 Jan 2017 13:49:42 +0100 Subject: asm-prototypes: Clear any CPP defines before declaring the functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The asm-prototypes.h file is used to provide dummy function declarations for genksyms, when processing asm files with EXPORT_SYMBOL. Make sure that any architecture defines get out of our way. x86 currently has an issue with memcpy on 64bit with CONFIG_KMEMCHECK=y and with memset/__memset on 32bit: $ cat init/test.c #include $ make -s init/test.o In file included from ./arch/x86/include/asm/string.h:4:0, from ./include/linux/string.h:18, from ./include/linux/bitmap.h:8, from ./include/linux/cpumask.h:11, from ./arch/x86/include/asm/cpumask.h:4, from ./arch/x86/include/asm/msr.h:10, from ./arch/x86/include/asm/processor.h:20, from ./arch/x86/include/asm/cpufeature.h:4, from ./arch/x86/include/asm/thread_info.h:52, from ./include/linux/thread_info.h:25, from ./arch/x86/include/asm/preempt.h:6, from ./include/linux/preempt.h:59, from ./include/linux/spinlock.h:50, from ./include/linux/seqlock.h:35, from ./include/linux/time.h:5, from ./include/uapi/linux/timex.h:56, from ./include/linux/timex.h:56, from ./include/linux/sched.h:19, from ./include/linux/uaccess.h:4, from ./arch/x86/include/asm/asm-prototypes.h:2, from init/test.c:1: ./arch/x86/include/asm/string_64.h:52:47: error: expected declaration specifiers or ‘...’ before ‘(’ token #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) ./include/asm-generic/asm-prototypes.h:6:14: note: in expansion of macro ‘memcpy’ extern void *memcpy(void *, const void *, __kernel_size_t); ^ ... During real build, this manifests itself by genksyms segfaulting. Fixes: 334bb7738764 ("x86/kbuild: enable modversions for symbols exported from asm") Reported-and-tested-by: Borislav Petkov Cc: Adam Borowski Signed-off-by: Michal Marek --- include/asm-generic/asm-prototypes.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h index df13637e4017..939869c772b1 100644 --- a/include/asm-generic/asm-prototypes.h +++ b/include/asm-generic/asm-prototypes.h @@ -1,7 +1,13 @@ #include +#undef __memset extern void *__memset(void *, int, __kernel_size_t); +#undef __memcpy extern void *__memcpy(void *, const void *, __kernel_size_t); +#undef __memmove extern void *__memmove(void *, const void *, __kernel_size_t); +#undef memset extern void *memset(void *, int, __kernel_size_t); +#undef memcpy extern void *memcpy(void *, const void *, __kernel_size_t); +#undef memmove extern void *memmove(void *, const void *, __kernel_size_t); -- cgit v1.2.3-58-ga151 From 7453c549f5f6485c0d79cad7844870dcc7d1b34d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 20 Dec 2016 10:02:02 -0500 Subject: swiotlb: Export swiotlb_max_segment to users So they can figure out what is the optimal number of pages that can be contingously stitched together without fear of bounce buffer. We also expose an mechanism for sub-users of SWIOTLB API, such as Xen-SWIOTLB to set the max segment value. And lastly if swiotlb=force is set (which mandates we bounce buffer everything) we set max_segment so at least we can bounce buffer one 4K page instead of a giant 512KB one for which we may not have space. Signed-off-by: Konrad Rzeszutek Wilk Reported-and-Tested-by: Juergen Gross --- drivers/gpu/drm/i915/i915_gem.c | 11 +---------- drivers/xen/swiotlb-xen.c | 4 ++++ include/linux/swiotlb.h | 3 +++ lib/swiotlb.c | 26 ++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d0dcaf35b429..115fa399608c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2290,15 +2290,6 @@ unlock: mutex_unlock(&obj->mm.lock); } -static unsigned int swiotlb_max_size(void) -{ -#if IS_ENABLED(CONFIG_SWIOTLB) - return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE); -#else - return 0; -#endif -} - static void i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; @@ -2345,7 +2336,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - max_segment = swiotlb_max_size(); + max_segment = swiotlb_max_segment(); if (!max_segment) max_segment = rounddown(UINT_MAX, PAGE_SIZE); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index aba12009422e..f905d6eeb048 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -275,6 +275,10 @@ retry: rc = 0; } else rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); + + if (!rc) + swiotlb_set_max_segment(PAGE_SIZE); + return rc; error: if (repeat--) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index d9c84a9cde3d..4ee479f2f355 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -114,11 +114,14 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB extern void __init swiotlb_free(void); +unsigned int swiotlb_max_segment(void); #else static inline void swiotlb_free(void) { } +static inline unsigned int swiotlb_max_segment(void) { return 0; } #endif extern void swiotlb_print_info(void); extern int is_swiotlb_buffer(phys_addr_t paddr); +extern void swiotlb_set_max_segment(unsigned int); #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 9def738af4f4..975b8fc4f1e1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -82,6 +82,12 @@ static phys_addr_t io_tlb_overflow_buffer; static unsigned int *io_tlb_list; static unsigned int io_tlb_index; +/* + * Max segment that we can provide which (if pages are contingous) will + * not be bounced (unless SWIOTLB_FORCE is set). + */ +unsigned int max_segment; + /* * We need to save away the original address corresponding to a mapped entry * for the sync operations. @@ -124,6 +130,20 @@ unsigned long swiotlb_nr_tbl(void) } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); +unsigned int swiotlb_max_segment(void) +{ + return max_segment; +} +EXPORT_SYMBOL_GPL(swiotlb_max_segment); + +void swiotlb_set_max_segment(unsigned int val) +{ + if (swiotlb_force == SWIOTLB_FORCE) + max_segment = 1; + else + max_segment = rounddown(val, PAGE_SIZE); +} + /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) unsigned long swiotlb_size_or_default(void) @@ -205,6 +225,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); return 0; } @@ -283,6 +304,7 @@ swiotlb_late_init_with_default_size(size_t default_size) rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); if (rc) free_pages((unsigned long)vstart, order); + return rc; } @@ -337,6 +359,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) late_alloc = 1; + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); + return 0; cleanup4: @@ -351,6 +375,7 @@ cleanup2: io_tlb_end = 0; io_tlb_start = 0; io_tlb_nslabs = 0; + max_segment = 0; return -ENOMEM; } @@ -379,6 +404,7 @@ void __init swiotlb_free(void) PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; + max_segment = 0; } int is_swiotlb_buffer(phys_addr_t paddr) -- cgit v1.2.3-58-ga151 From ea07b862ac8ef9b8c8358517d2e39f847dda6659 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 6 Jan 2017 19:21:43 -0500 Subject: mm: workingset: fix use-after-free in shadow node shrinker Several people report seeing warnings about inconsistent radix tree nodes followed by crashes in the workingset code, which all looked like use-after-free access from the shadow node shrinker. Dave Jones managed to reproduce the issue with a debug patch applied, which confirmed that the radix tree shrinking indeed frees shadow nodes while they are still linked to the shadow LRU: WARNING: CPU: 2 PID: 53 at lib/radix-tree.c:643 delete_node+0x1e4/0x200 CPU: 2 PID: 53 Comm: kswapd0 Not tainted 4.10.0-rc2-think+ #3 Call Trace: delete_node+0x1e4/0x200 __radix_tree_delete_node+0xd/0x10 shadow_lru_isolate+0xe6/0x220 __list_lru_walk_one.isra.4+0x9b/0x190 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x2e/0x40 shrink_slab.part.44+0x23d/0x5d0 shrink_node+0x22c/0x330 kswapd+0x392/0x8f0 This is the WARN_ON_ONCE(!list_empty(&node->private_list)) placed in the inlined radix_tree_shrink(). The problem is with 14b468791fa9 ("mm: workingset: move shadow entry tracking to radix tree exceptional tracking"), which passes an update callback into the radix tree to link and unlink shadow leaf nodes when tree entries change, but forgot to pass the callback when reclaiming a shadow node. While the reclaimed shadow node itself is unlinked by the shrinker, its deletion from the tree can cause the left-most leaf node in the tree to be shrunk. If that happens to be a shadow node as well, we don't unlink it from the LRU as we should. Consider this tree, where the s are shadow entries: root->rnode | [0 n] | | [s ] [sssss] Now the shadow node shrinker reclaims the rightmost leaf node through the shadow node LRU: root->rnode | [0 ] | [s ] Because the parent of the deleted node is the first level below the root and has only one child in the left-most slot, the intermediate level is shrunk and the node containing the single shadow is put in its place: root->rnode | [s ] The shrinker again sees a single left-most slot in a first level node and thus decides to store the shadow in root->rnode directly and free the node - which is a leaf node on the shadow node LRU. root->rnode | s Without the update callback, the freed node remains on the shadow LRU, where it causes later shrinker runs to crash. Pass the node updater callback into __radix_tree_delete_node() in case the deletion causes the left-most branch in the tree to collapse too. Also add warnings when linked nodes are freed right away, rather than wait for the use-after-free when the list is scanned much later. Fixes: 14b468791fa9 ("mm: workingset: move shadow entry tracking to radix tree exceptional tracking") Reported-by: Dave Chinner Reported-by: Hugh Dickins Reported-by: Andrea Arcangeli Reported-and-tested-by: Dave Jones Signed-off-by: Johannes Weiner Cc: Christoph Hellwig Cc: Chris Leech Cc: Lee Duncan Cc: Jan Kara Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 4 +++- lib/radix-tree.c | 11 +++++++++-- mm/workingset.c | 3 ++- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5dea8f6440e4..52bda854593b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -306,7 +306,9 @@ void radix_tree_iter_replace(struct radix_tree_root *, void radix_tree_replace_slot(struct radix_tree_root *root, void **slot, void *item); void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node); + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); void radix_tree_clear_tags(struct radix_tree_root *root, diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6f382e07de77..0b92d605fb69 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -640,6 +640,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, update_node(node, private); } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); } } @@ -666,6 +667,7 @@ static void delete_node(struct radix_tree_root *root, root->rnode = NULL; } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); node = parent; @@ -767,6 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) struct radix_tree_node *old = child; offset = child->offset + 1; child = child->parent; + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(old); if (old == entry_to_node(node)) return; @@ -1824,15 +1827,19 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); * __radix_tree_delete_node - try to free node after clearing a slot * @root: radix tree root * @node: node containing @index + * @update_node: callback for changing leaf nodes + * @private: private data to pass to @update_node * * After clearing the slot at @index in @node from radix tree * rooted at @root, call this function to attempt freeing the * node and shrinking the tree. */ void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node) + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private) { - delete_node(root, node, NULL, NULL); + delete_node(root, node, update_node, private); } /** diff --git a/mm/workingset.c b/mm/workingset.c index 241fa5d6b3b2..abb58ffa3c64 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -473,7 +473,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); - __radix_tree_delete_node(&mapping->page_tree, node); + __radix_tree_delete_node(&mapping->page_tree, node, + workingset_update_node, mapping); out_invalid: spin_unlock(&mapping->tree_lock); -- cgit v1.2.3-58-ga151