From cbab901296232b1247b46e6e127103d2f738d783 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: silence RCU warning in rhashtable_test. print_ht in rhashtable_test calls rht_dereference() with neither RCU protection or the mutex. This triggers an RCU warning. So take the mutex to silence the warning. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index fb6968109113..6ca59ffcacbe 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -501,6 +501,8 @@ static unsigned int __init print_ht(struct rhltable *rhlt) unsigned int i, cnt = 0; ht = &rhlt->ht; + /* Take the mutex to avoid RCU warning */ + mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -534,6 +536,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) } } printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + mutex_unlock(&ht->mutex); return cnt; } -- cgit v1.2.3-58-ga151 From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: split rhashtable.h Due to the use of rhashtables in net namespaces, rhashtable.h is included in lots of the kernel, so a small changes can required a large recompilation. This makes development painful. This patch splits out rhashtable-types.h which just includes the major type declarations, and does not include (non-trivial) inline code. rhashtable.h is no longer included by anything in the include/ directory. Common include files only include rhashtable-types.h so a large recompilation is only triggered when that changes. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- MAINTAINERS | 2 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + include/linux/ipc.h | 2 +- include/linux/ipc_namespace.h | 2 +- include/linux/mroute_base.h | 2 +- include/linux/rhashtable-types.h | 139 +++++++++++++++++++++++++++++ include/linux/rhashtable.h | 127 +------------------------- include/net/inet_frag.h | 2 +- include/net/netfilter/nf_flow_table.h | 2 +- include/net/sctp/structs.h | 2 +- include/net/seg6.h | 2 +- include/net/seg6_hmac.h | 2 +- ipc/msg.c | 1 + ipc/sem.c | 1 + ipc/shm.c | 1 + ipc/util.c | 1 + lib/rhashtable.c | 1 + net/ipv4/inet_fragment.c | 1 + net/ipv4/ipmr.c | 1 + net/ipv4/ipmr_base.c | 1 + net/ipv6/ip6mr.c | 1 + net/ipv6/seg6.c | 1 + net/ipv6/seg6_hmac.c | 1 + net/netfilter/nf_tables_api.c | 1 + net/sctp/input.c | 1 + net/sctp/socket.c | 1 + 26 files changed, 166 insertions(+), 133 deletions(-) create mode 100644 include/linux/rhashtable-types.h (limited to 'lib') diff --git a/MAINTAINERS b/MAINTAINERS index edf3cf5ea691..99e5cef8172e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12162,7 +12162,9 @@ M: Herbert Xu L: netdev@vger.kernel.org S: Maintained F: lib/rhashtable.c +F: lib/test_rhashtable.c F: include/linux/rhashtable.h +F: include/linux/rhashtable-types.h RICOH R5C592 MEMORYSTICK DRIVER M: Maxim Levitsky diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0dbe2d9e22d6..1adb968b8354 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 6cc2df7f7ac9..e1c9eea6015b 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index b5630c8eb2f3..6cea726612b7 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct user_namespace; diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index d633f737b3c6..fd436cdd4725 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -2,7 +2,7 @@ #define __LINUX_MROUTE_BASE_H #include -#include +#include #include #include #include diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h new file mode 100644 index 000000000000..9740063ff13b --- /dev/null +++ b/include/linux/rhashtable-types.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Simple structures that might be needed in include + * files. + */ + +#ifndef _LINUX_RHASHTABLE_TYPES_H +#define _LINUX_RHASHTABLE_TYPES_H + +#include +#include +#include +#include + +struct rhash_head { + struct rhash_head __rcu *next; +}; + +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + +struct bucket_table; + +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) + * @automatic_shrinking: Enable automatic shrinking of tables + * @nulls_base: Base value to generate nulls marker + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) + * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object + */ +struct rhashtable_params { + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; + unsigned int max_size; + u16 min_size; + bool automatic_shrinking; + u8 locks_mul; + u32 nulls_base; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @key_len: Key length for hashfn + * @max_elems: Maximum number of elements in table + * @p: Configuration parameters + * @rhlist: True if this is an rhltable + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + unsigned int key_len; + unsigned int max_elems; + struct rhashtable_params p; + bool rhlist; + struct work_struct run_work; + struct mutex mutex; + spinlock_t lock; + atomic_t nelems; +}; + +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @tbl: The table that we were walking over + */ +struct rhashtable_walker { + struct list_head list; + struct bucket_table *tbl; +}; + +/** + * struct rhashtable_iter - Hash table iterator + * @ht: Table to iterate through + * @p: Current pointer + * @list: Current hash list pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhlist_head *list; + struct rhashtable_walker walker; + unsigned int slot; + unsigned int skip; + bool end_of_table; +}; + +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); + +#endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4e1f535c2034..48754ab07cdf 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * @@ -17,16 +18,14 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H -#include -#include #include #include #include #include #include -#include #include +#include /* * The end of the chain is marked with a special nulls marks which has * the following format: @@ -64,15 +63,6 @@ */ #define RHT_ELASTICITY 16u -struct rhash_head { - struct rhash_head __rcu *next; -}; - -struct rhlist_head { - struct rhash_head rhead; - struct rhlist_head __rcu *next; -}; - /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -102,114 +92,6 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -/** - * struct rhashtable_compare_arg - Key for the function rhashtable_compare - * @ht: Hash table - * @key: Key to compare against - */ -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; -}; - -typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); -typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, - const void *obj); - -struct rhashtable; - -/** - * struct rhashtable_params - Hash table construction parameters - * @nelem_hint: Hint on number of elements, should be 75% of desired size - * @key_len: Length of key - * @key_offset: Offset of key in struct to be hashed - * @head_offset: Offset of rhash_head in struct to be hashed - * @max_size: Maximum size while expanding - * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) - * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker - * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) - * @obj_hashfn: Function to hash object - * @obj_cmpfn: Function to compare key with object - */ -struct rhashtable_params { - u16 nelem_hint; - u16 key_len; - u16 key_offset; - u16 head_offset; - unsigned int max_size; - u16 min_size; - bool automatic_shrinking; - u8 locks_mul; - u32 nulls_base; - rht_hashfn_t hashfn; - rht_obj_hashfn_t obj_hashfn; - rht_obj_cmpfn_t obj_cmpfn; -}; - -/** - * struct rhashtable - Hash table handle - * @tbl: Bucket table - * @key_len: Key length for hashfn - * @max_elems: Maximum number of elements in table - * @p: Configuration parameters - * @rhlist: True if this is an rhltable - * @run_work: Deferred worker to expand/shrink asynchronously - * @mutex: Mutex to protect current/future table swapping - * @lock: Spin lock to protect walker list - * @nelems: Number of elements in table - */ -struct rhashtable { - struct bucket_table __rcu *tbl; - unsigned int key_len; - unsigned int max_elems; - struct rhashtable_params p; - bool rhlist; - struct work_struct run_work; - struct mutex mutex; - spinlock_t lock; - atomic_t nelems; -}; - -/** - * struct rhltable - Hash table with duplicate objects in a list - * @ht: Underlying rhtable - */ -struct rhltable { - struct rhashtable ht; -}; - -/** - * struct rhashtable_walker - Hash table walker - * @list: List entry on list of walkers - * @tbl: The table that we were walking over - */ -struct rhashtable_walker { - struct list_head list; - struct bucket_table *tbl; -}; - -/** - * struct rhashtable_iter - Hash table iterator - * @ht: Table to iterate through - * @p: Current pointer - * @list: Current hash list pointer - * @walker: Associated rhashtable walker - * @slot: Current slot - * @skip: Number of entries to skip in slot - */ -struct rhashtable_iter { - struct rhashtable *ht; - struct rhash_head *p; - struct rhlist_head *list; - struct rhashtable_walker walker; - unsigned int slot; - unsigned int skip; - bool end_of_table; -}; - static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -376,11 +258,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, - const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, - const struct rhashtable_params *params); - void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ed07e3786d98..f4272a29dc44 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -2,7 +2,7 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include +#include struct netns_frags { /* sysctls */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index ba9fa4592f2b..0e355f4a3d76 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dbe1b911a24d..e0f962d27386 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,7 +48,7 @@ #define __sctp_structs_h__ #include -#include +#include #include /* linux/in.h needs this!! */ #include /* We get struct sockaddr_in. */ #include /* We get struct in6_addr */ diff --git a/include/net/seg6.h b/include/net/seg6.h index e029e301faa5..2567941a2f32 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 69c3a106056b..7fda469e2758 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define SEG6_HMAC_MAX_DIGESTSIZE 160 #define SEG6_HMAC_RING_SIZE 256 diff --git a/ipc/msg.c b/ipc/msg.c index 3b6545302598..203281198079 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/ipc/sem.c b/ipc/sem.c index 5af1943ad782..29c0347ef11d 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df..d4daf78df6da 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include diff --git a/ipc/util.c b/ipc/util.c index 4e81182fa0ac..fdffff41f65b 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -63,6 +63,7 @@ #include #include #include +#include #include diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9427b5766134..c9fafea7dc6e 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -28,6 +28,7 @@ #include #include #include +#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9e35b81d093..316518f87294 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9f79b9803a16..82f914122f1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index cafb0506c8c9..1ad9aa62a97b 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -2,6 +2,7 @@ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation */ +#include #include /* Sets everything common except 'dev', since that is done under locking */ diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0d0f0053bb11..d0b7e0249c13 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 0fdf2a55e746..8d0ba757a46c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 33fb35cbfac1..b1791129a875 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 896d4a36081d..3f211e1025c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/input.c b/net/sctp/input.c index ba8a6e6c36fa..9bbc5f92c941 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -56,6 +56,7 @@ #include #include #include +#include /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d20f7addee19..0e91e83eea5a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3-58-ga151 From 9f9a707738aa7a8b9f78a641b83927ada256a626 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: remove nulls_base and related code. This "feature" is unused, undocumented, and untested and so doesn't really belong. A patch is under development to properly implement support for detecting when a search gets diverted down a different chain, which the common purpose of nulls markers. This patch actually fixes a bug too. The table resizing allows a table to grow to 2^31 buckets, but the hash is truncated to 27 bits - any growth beyond 2^27 is wasteful an ineffective. This patch results in NULLS_MARKER(0) being used for all chains, and leaves the use of rht_is_a_null() to test for it. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable-types.h | 2 -- include/linux/rhashtable.h | 33 +++------------------------------ lib/rhashtable.c | 8 -------- lib/test_rhashtable.c | 5 +---- net/core/xdp.c | 4 ++-- 5 files changed, 6 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 9740063ff13b..763d613ce2c2 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -50,7 +50,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @min_size: Minimum size while shrinking * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object @@ -64,7 +63,6 @@ struct rhashtable_params { u16 min_size; bool automatic_shrinking; u8 locks_mul; - u32 nulls_base; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 48754ab07cdf..d9f719af7936 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -28,25 +28,8 @@ #include /* * The end of the chain is marked with a special nulls marks which has - * the following format: - * - * +-------+-----------------------------------------------------+-+ - * | Base | Hash |1| - * +-------+-----------------------------------------------------+-+ - * - * Base (4 bits) : Reserved to distinguish between multiple tables. - * Specified via &struct rhashtable_params.nulls_base. - * Hash (27 bits): Full hash (unmasked) of first element added to bucket - * 1 (1 bit) : Nulls marker (always set) - * - * The remaining bits of the next pointer remain unused for now. + * the least significant bit set. */ -#define RHT_BASE_BITS 4 -#define RHT_HASH_BITS 27 -#define RHT_BASE_SHIFT RHT_HASH_BITS - -/* Base bits plus 1 bit for nulls marker */ -#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) /* Maximum chain length before rehash * @@ -92,24 +75,14 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) -{ - return NULLS_MARKER(ht->p.nulls_base + hash); -} - #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ - ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) + ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } -static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) -{ - return ((unsigned long) ptr) >> 1; -} - static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { @@ -119,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht, static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { - return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); + return hash & (tbl->size - 1); } static inline unsigned int rht_key_get_hash(struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c9fafea7dc6e..688693c919be 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -995,7 +995,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, - * .nulls_base = (1U << RHT_BASE_SHIFT), * }; * * Configuration Example 2: Variable length keys @@ -1029,9 +1028,6 @@ int rhashtable_init(struct rhashtable *ht, (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; - if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) - return -EINVAL; - memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); @@ -1096,10 +1092,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) { int err; - /* No rhlist NULLs marking for now. */ - if (params->nulls_base) - return -EINVAL; - err = rhashtable_init(&hlt->ht, params); hlt->ht.rhlist = true; return err; diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 6ca59ffcacbe..82ac39ce5310 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed) { const struct test_obj_rhl *obj = data; - return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; + return (obj->value.id % 10); } static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) @@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = { .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(struct test_obj_val), .hashfn = jhash, - .nulls_base = (3U << RHT_BASE_SHIFT), }; static struct rhashtable_params test_rht_params_dup = { @@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries) if (!obj_in_table) goto out_free; - /* nulls_base not supported in rhlist interface */ - test_rht_params.nulls_base = 0; err = rhltable_init(&rhlt, &test_rht_params); if (WARN_ON(err)) goto out_free; diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d..31c58719b5a9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -45,8 +45,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) != sizeof(u32)); - /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */ - return key << RHT_HASH_RESERVED_SPACE; + /* Use cyclic increasing ID as direct hash key */ + return key; } static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, -- cgit v1.2.3-58-ga151 From 9b4f64a227b6f462482a8cc68c7134dc6e26f1c1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: simplify INIT_RHT_NULLS_HEAD() The 'ht' and 'hash' arguments to INIT_RHT_NULLS_HEAD() are no longer used - so drop them. This allows us to also remove the nhash argument from nested_table_alloc(). Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- lib/rhashtable.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d9f719af7936..3f3a182bd0b4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -75,7 +75,7 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ +#define INIT_RHT_NULLS_HEAD(ptr) \ ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 688693c919be..a81cd27d518c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -116,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head) static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, - unsigned int shifted, - unsigned int nhash) + unsigned int shifted) { union nested_table *ntbl; int i; @@ -130,8 +129,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, if (ntbl && shifted) { for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) - INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, - (i << shifted) | nhash); + INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } rcu_assign_pointer(*prev, ntbl); @@ -157,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0, 0)) { + 0)) { kfree(tbl); return NULL; } @@ -207,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) - INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); + INIT_RHT_NULLS_HEAD(tbl->buckets[i]); return tbl; } @@ -1217,7 +1215,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, nhash = index; shifted = tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, nhash); + size <= (1 << shift) ? shifted : 0); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); @@ -1226,8 +1224,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, nhash |= index << shifted; shifted += shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, - nhash); + size <= (1 << shift) ? shifted : 0); } if (!ntbl) -- cgit v1.2.3-58-ga151 From 5af68ef7333c8606bfe6e400cb962081518c3acb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: simplify nested_table_alloc() and rht_bucket_nested_insert() Now that we don't use the hash value or shift in nested_table_alloc() there is room for simplification. We only need to pass a "is this a leaf" flag to nested_table_alloc(), and don't need to track as much information in rht_bucket_nested_insert(). Note there is another minor cleanup in nested_table_alloc() here. The number of elements in a page of "union nested_tables" is most naturally PAGE_SIZE / sizeof(ntbl[0]) The previous code had PAGE_SIZE / sizeof(ntbl[0].bucket) which happens to be the correct value only because the bucket uses all the space in the union. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a81cd27d518c..2aa41c15df17 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -116,7 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head) static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, - unsigned int shifted) + bool leaf) { union nested_table *ntbl; int i; @@ -127,8 +127,8 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); - if (ntbl && shifted) { - for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) + if (ntbl && leaf) { + for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } @@ -155,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0)) { + false)) { kfree(tbl); return NULL; } @@ -1207,24 +1207,18 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; union nested_table *ntbl; - unsigned int shifted; - unsigned int nhash; ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); hash >>= tbl->nest; - nhash = index; - shifted = tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0); + size <= (1 << shift)); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); size >>= shift; hash >>= shift; - nhash |= index << shifted; - shifted += shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0); + size <= (1 << shift)); } if (!ntbl) -- cgit v1.2.3-58-ga151 From 0ad66449aa3cbaedbdeaf55bffce74084bb7e9f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: use cmpxchg() to protect ->future_tbl. Rather than borrowing one of the bucket locks to protect ->future_tbl updates, use cmpxchg(). This gives more freedom to change how bucket locking is implemented. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 2aa41c15df17..52ec83212856 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -297,21 +297,14 @@ static int rhashtable_rehash_attach(struct rhashtable *ht, struct bucket_table *old_tbl, struct bucket_table *new_tbl) { - /* Protect future_tbl using the first bucket lock. */ - spin_lock_bh(old_tbl->locks); - - /* Did somebody beat us to it? */ - if (rcu_access_pointer(old_tbl->future_tbl)) { - spin_unlock_bh(old_tbl->locks); - return -EEXIST; - } - /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. + * As cmpxchg() provides strong barriers, we do not need + * rcu_assign_pointer(). */ - rcu_assign_pointer(old_tbl->future_tbl, new_tbl); - spin_unlock_bh(old_tbl->locks); + if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL) + return -EEXIST; return 0; } -- cgit v1.2.3-58-ga151 From c0690016a73fe6bd456887bbbe6e10c7f0096554 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: clean up dereference of ->future_tbl. Using rht_dereference_bucket() to dereference ->future_tbl looks like a type error, and could be confusing. Using rht_dereference_rcu() to test a pointer for NULL adds an unnecessary barrier - rcu_access_pointer() is preferred for NULL tests when no lock is held. This uses 3 different ways to access ->future_tbl. - if we know the mutex is held, use rht_dereference() - if we don't hold the mutex, and are only testing for NULL, use rcu_access_pointer() - otherwise (using RCU protection for true dereference), use rht_dereference_rcu(). Note that this includes a simplification of the call to rhashtable_last_table() - we don't do an extra dereference before the call any more. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- lib/rhashtable.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3f3a182bd0b4..eb7111039247 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -595,7 +595,7 @@ static inline void *__rhashtable_insert_fast( lock = rht_bucket_lock(tbl, hash); spin_lock_bh(lock); - if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { + if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: spin_unlock_bh(lock); rcu_read_unlock(); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 52ec83212856..0e04947b7e0c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -226,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = rhashtable_last_table(ht, - rht_dereference_rcu(old_tbl->future_tbl, ht)); + struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; @@ -467,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, fail: /* Do not fail the insert if someone else did a rehash. */ - if (likely(rcu_dereference_raw(tbl->future_tbl))) + if (likely(rcu_access_pointer(tbl->future_tbl))) return 0; /* Schedule async rehash to retry allocation in process context. */ @@ -540,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) return ERR_CAST(data); - new_tbl = rcu_dereference(tbl->future_tbl); + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (new_tbl) return new_tbl; @@ -599,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, break; spin_unlock_bh(lock); - tbl = rcu_dereference(tbl->future_tbl); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); } data = rhashtable_lookup_one(ht, tbl, hash, key, obj); -- cgit v1.2.3-58-ga151 From 0e2dc70e3d0d503b0cc9c5f74db3eb6db52c9e22 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Jun 2018 08:58:30 +0200 Subject: bitfield: add tests Add tests for the bitfield helpers. The constant ones will all be folded to nothing by the compiler (if everything is correct in the header file), and the variable ones do some tests against open-coding the necessary shifts. A few test cases that should fail/warn compilation are provided under ifdef. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- lib/Kconfig.debug | 7 +++ lib/Makefile | 1 + lib/test_bitfield.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 lib/test_bitfield.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8838d1158d19..d3d82eccdfa5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1802,6 +1802,13 @@ config TEST_BITMAP If unsure, say N. +config TEST_BITFIELD + tristate "Test bitfield functions at runtime" + help + Enable this option to test the bitfield functions at boot. + + If unsure, say N. + config TEST_UUID tristate "Test functions located in the uuid module at runtime" diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..701717a23d32 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c new file mode 100644 index 000000000000..5b8f4108662d --- /dev/null +++ b/lib/test_bitfield.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#define CHECK_ENC_GET_U(tp, v, field, res) do { \ + { \ + u##tp _res; \ + \ + _res = u##tp##_encode_bits(v, field); \ + if (_res != res) { \ + pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\ + (u64)_res); \ + return -EINVAL; \ + } \ + if (u##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_LE(tp, v, field, res) do { \ + { \ + __le##tp _res; \ + \ + _res = le##tp##_encode_bits(v, field); \ + if (_res != cpu_to_le##tp(res)) { \ + pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)le##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (le##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_BE(tp, v, field, res) do { \ + { \ + __be##tp _res; \ + \ + _res = be##tp##_encode_bits(v, field); \ + if (_res != cpu_to_be##tp(res)) { \ + pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)be##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (be##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET(tp, v, field, res) do { \ + CHECK_ENC_GET_U(tp, v, field, res); \ + CHECK_ENC_GET_LE(tp, v, field, res); \ + CHECK_ENC_GET_BE(tp, v, field, res); \ + } while (0) + +static int test_constants(void) +{ + /* + * NOTE + * This whole function compiles (or at least should, if everything + * is going according to plan) to nothing after optimisation. + */ + + CHECK_ENC_GET(16, 1, 0x000f, 0x0001); + CHECK_ENC_GET(16, 3, 0x00f0, 0x0030); + CHECK_ENC_GET(16, 5, 0x0f00, 0x0500); + CHECK_ENC_GET(16, 7, 0xf000, 0x7000); + CHECK_ENC_GET(16, 14, 0x000f, 0x000e); + CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0); + + CHECK_ENC_GET_U(8, 1, 0x0f, 0x01); + CHECK_ENC_GET_U(8, 3, 0xf0, 0x30); + CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e); + CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0); + + CHECK_ENC_GET(32, 1, 0x00000f00, 0x00000100); + CHECK_ENC_GET(32, 3, 0x0000f000, 0x00003000); + CHECK_ENC_GET(32, 5, 0x000f0000, 0x00050000); + CHECK_ENC_GET(32, 7, 0x00f00000, 0x00700000); + CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000); + CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000); + + CHECK_ENC_GET(64, 1, 0x00000f0000000000ull, 0x0000010000000000ull); + CHECK_ENC_GET(64, 3, 0x0000f00000000000ull, 0x0000300000000000ull); + CHECK_ENC_GET(64, 5, 0x000f000000000000ull, 0x0005000000000000ull); + CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull); + CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull); + CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull); + + return 0; +} + +#define CHECK(tp, mask) do { \ + u64 v; \ + \ + for (v = 0; v < 1 << hweight32(mask); v++) \ + if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \ + return -EINVAL; \ + } while (0) + +static int test_variables(void) +{ + CHECK(u8, 0x0f); + CHECK(u8, 0xf0); + CHECK(u8, 0x38); + + CHECK(u16, 0x0038); + CHECK(u16, 0x0380); + CHECK(u16, 0x3800); + CHECK(u16, 0x8000); + + CHECK(u32, 0x80000000); + CHECK(u32, 0x7f000000); + CHECK(u32, 0x07e00000); + CHECK(u32, 0x00018000); + + CHECK(u64, 0x8000000000000000ull); + CHECK(u64, 0x7f00000000000000ull); + CHECK(u64, 0x0001800000000000ull); + CHECK(u64, 0x0000000080000000ull); + CHECK(u64, 0x000000007f000000ull); + CHECK(u64, 0x0000000018000000ull); + CHECK(u64, 0x0000001f8000000ull); + + return 0; +} + +static int __init test_bitfields(void) +{ + int ret = test_constants(); + + if (ret) { + pr_warn("constant tests failed!\n"); + return ret; + } + + ret = test_variables(); + if (ret) { + pr_warn("variable tests failed!\n"); + return ret; + } + +#ifdef TEST_BITFIELD_COMPILE + /* these should fail compilation */ + CHECK_ENC_GET(16, 16, 0x0f00, 0x1000); + u32_encode_bits(7, 0x06000000); + + /* this should at least give a warning */ + u16_encode_bits(0, 0x60000); +#endif + + pr_info("tests passed\n"); + + return 0; +} +module_init(test_bitfields) + +MODULE_AUTHOR("Johannes Berg "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-58-ga151 From 7861552cedd81a164c0d5d1c89fe2cb45a3ed41b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Jun 2018 12:39:18 -0700 Subject: netlink: Return extack message if attribute validation fails Have one extack message for parsing and validating. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- lib/nlattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index dfa55c873c13..e335bcafa9e4 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -253,8 +253,8 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) { - if (extack) - extack->bad_attr = nla; + NL_SET_ERR_MSG_ATTR(extack, nla, + "Attribute failed policy validation"); goto errout; } } -- cgit v1.2.3-58-ga151 From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:18 -0700 Subject: lib: reciprocal_div: implement the improved algorithm on the paper mentioned The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++ lib/reciprocal_div.c | 41 +++++++++++++++++++++++++ 2 files changed, 109 insertions(+) (limited to 'lib') diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index e031e9f2f9d8..585ce89c0f33 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -25,6 +25,9 @@ struct reciprocal_value { u8 sh1, sh2; }; +/* "reciprocal_value" and "reciprocal_divide" together implement the basic + * version of the algorithm described in Figure 4.1 of the paper. + */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) @@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) return (t + ((a - t) >> R.sh1)) >> R.sh2; } +struct reciprocal_value_adv { + u32 m; + u8 sh, exp; + bool is_wide_m; +}; + +/* "reciprocal_value_adv" implements the advanced version of the algorithm + * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose + * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The + * exception case could be easily handled before calling "reciprocal_value_adv". + * + * The advanced version requires more complex calculation to get the reciprocal + * multiplier and other control variables, but then could reduce the required + * emulation operations. + * + * It makes no sense to use this advanced version for host divide emulation, + * those extra complexities for calculating multiplier etc could completely + * waive our saving on emulation operations. + * + * However, it makes sense to use it for JIT divide code generation for which + * we are willing to trade performance of JITed code with that of host. As shown + * by the following pseudo code, the required emulation operations could go down + * from 6 (the basic version) to 3 or 4. + * + * To use the result of "reciprocal_value_adv", suppose we want to calculate + * n/d, the pseudo C code will be: + * + * struct reciprocal_value_adv rvalue; + * u8 pre_shift, exp; + * + * // handle exception case. + * if (d >= (1U << 31)) { + * result = n >= d; + * return; + * } + * + * rvalue = reciprocal_value_adv(d, 32) + * exp = rvalue.exp; + * if (rvalue.is_wide_m && !(d & 1)) { + * // floor(log2(d & (2^32 -d))) + * pre_shift = fls(d & -d) - 1; + * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); + * } else { + * pre_shift = 0; + * } + * + * // code generation starts. + * if (imm == 1U << exp) { + * result = n >> exp; + * } else if (rvalue.is_wide_m) { + * // pre_shift must be zero when reached here. + * t = (n * rvalue.m) >> 32; + * result = n - t; + * result >>= 1; + * result += t; + * result >>= rvalue.sh - 1; + * } else { + * if (pre_shift) + * result = n >> pre_shift; + * result = ((u64)result * rvalue.m) >> 32; + * result >>= rvalue.sh; + * } + */ +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); + #endif /* _LINUX_RECIPROCAL_DIV_H */ diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index fcb4ce682c6f..bf043258fa00 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d) return R; } EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); -- cgit v1.2.3-58-ga151 From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:48 +0000 Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users Normally kobjects and their sysfs representation belong to global root, however it is not necessarily the case for objects in separate namespaces. For example, objects in separate network namespace logically belong to the container's root and not global root. This change lays groundwork for allowing network namespace objects ownership to be transferred to container's root user by defining get_ownership() callback in ktype structure and using it in sysfs code to retrieve desired uid/gid when creating sysfs objects for given kobject. Co-Developed-by: Tyler Hicks Signed-off-by: Dmitry Torokhov Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller --- fs/sysfs/dir.c | 7 +++++-- fs/sysfs/file.c | 32 ++++++++++++++++++++------------ fs/sysfs/group.c | 23 +++++++++++++++++------ fs/sysfs/sysfs.h | 5 ++--- include/linux/kobject.h | 4 ++++ lib/kobject.c | 19 +++++++++++++++++++ 6 files changed, 67 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e39b884f0867..feeae8081c22 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; + kuid_t uid; + kgid_t gid; BUG_ON(!kobj); @@ -51,9 +53,10 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 513fa691ecbd..fa46216523cf 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t mode, const void *ns) + umode_t mode, kuid_t uid, kgid_t gid, const void *ns) { struct lock_class_key *key = NULL; const struct kernfs_ops *ops; @@ -302,8 +302,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, if (!attr->ignore_lockdep) key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif - kn = __kernfs_create_file(parent, attr->name, - mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + + kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) @@ -313,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, return 0; } -int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, - bool is_bin) -{ - return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL); -} - /** * sysfs_create_file_ns - create an attribute file for an object with custom ns * @kobj: object we're creating for @@ -328,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, + uid, gid, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); @@ -359,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error; if (group) { @@ -371,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj, if (!parent) return -ENOENT; - error = sysfs_add_file(parent, attr, false); + kobject_get_ownership(kobj, &uid, &gid); + error = sysfs_add_file_mode_ns(kobj->sd, attr, false, + attr->mode, uid, gid, NULL); kernfs_put(parent); return error; @@ -487,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); int sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, &attr->attr, true); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, + attr->attr.mode, uid, gid, NULL); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 4802ec0e1e3a..c7a716c4acc9 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent, } static int create_files(struct kernfs_node *parent, struct kobject *kobj, + kuid_t uid, kgid_t gid, const struct attribute_group *grp, int update) { struct attribute *const *attr; @@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, - mode, NULL); + mode, uid, gid, NULL); if (unlikely(error)) break; } @@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, - mode, NULL); + mode, + uid, gid, NULL); if (error) break; } @@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update, const struct attribute_group *grp) { struct kernfs_node *kn; + kuid_t uid; + kgid_t gid; int error; BUG_ON(!kobj || (!update && !kobj->sd)); @@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update, kobj->name, grp->name ?: ""); return -EINVAL; } + kobject_get_ownership(kobj, &uid, &gid); if (grp->name) { - kn = kernfs_create_dir(kobj->sd, grp->name, - S_IRWXU | S_IRUGO | S_IXUGO, kobj); + kn = kernfs_create_dir_ns(kobj->sd, grp->name, + S_IRWXU | S_IRUGO | S_IXUGO, + uid, gid, kobj, NULL); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(kobj->sd, grp->name); @@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update, } else kn = kobj->sd; kernfs_get(kn); - error = create_files(kn, kobj, grp, update); + error = create_files(kn, kobj, uid, gid, grp, update); if (error) { if (grp->name) kernfs_remove(kn); @@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error = 0; struct attribute *const *attr; int i; @@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj, if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) - error = sysfs_add_file(parent, *attr, false); + error = sysfs_add_file_mode_ns(parent, *attr, false, + (*attr)->mode, uid, gid, NULL); if (error) { while (--i >= 0) kernfs_remove_by_name(parent, (*--attr)->name); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d098e015fcc9..0050cc0c0236 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); /* * file.c */ -int sysfs_add_file(struct kernfs_node *parent, - const struct attribute *attr, bool is_bin); int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t amode, const void *ns); + umode_t amode, kuid_t uid, kgid_t gid, + const void *ns); /* * symlink.c diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7f6f93c3df9c..b49ff230beba 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -26,6 +26,7 @@ #include #include #include +#include #define UEVENT_HELPER_PATH_LEN 256 #define UEVENT_NUM_ENVP 32 /* number of env pointers */ @@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero( extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); +extern void kobject_get_ownership(struct kobject *kobj, + kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { @@ -122,6 +125,7 @@ struct kobj_type { struct attribute **default_attrs; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); + void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; struct kobj_uevent_env { diff --git a/lib/kobject.c b/lib/kobject.c index 18989b5b3b56..f2dc1f756007 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj) return kobj->ktype->namespace(kobj); } +/** + * kobject_get_ownership - get sysfs ownership data for @kobj + * @kobj: kobject in question + * @uid: kernel user ID for sysfs objects + * @gid: kernel group ID for sysfs objects + * + * Returns initial uid/gid pair that should be used when creating sysfs + * representation of given kobject. Normally used to adjust ownership of + * objects in a container. + */ +void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + *uid = GLOBAL_ROOT_UID; + *gid = GLOBAL_ROOT_GID; + + if (kobj->ktype->get_ownership) + kobj->ktype->get_ownership(kobj, uid, gid); +} + /* * populate_dir - populate directory with attributes. * @kobj: object we're working on. -- cgit v1.2.3-58-ga151 From d028b6f703209dbe96201b2714ff46625877128e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:49 +0000 Subject: kobject: kset_create_and_add() - fetch ownership info from parent This change implements get_ownership() for ksets created with kset_create_and_add() call by fetching ownership data from parent kobject. This is done mostly for benefit of "queues" attribute of net devices so that corresponding directory belongs to container's root instead of global root for network devices in a container. Signed-off-by: Dmitry Torokhov Reviewed-by: Tyler Hicks Signed-off-by: David S. Miller --- lib/kobject.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index f2dc1f756007..389829d3a1d1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -887,9 +887,16 @@ static void kset_release(struct kobject *kobj) kfree(kset); } +void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + if (kobj->parent) + kobject_get_ownership(kobj->parent, uid, gid); +} + static struct kobj_type kset_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .release = kset_release, + .release = kset_release, + .get_ownership = kset_get_ownership, }; /** -- cgit v1.2.3-58-ga151