summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2024-07-15 10:44:16 +0200
committerVlastimil Babka <vbabka@suse.cz>2024-07-15 10:44:16 +0200
commit436381eaf2a423e60fc8340399f7d2458091b383 (patch)
tree16ecce9d2dcb5ed43de60a966e9026c9ffd6e711
parenta52c6330ff2fe1163333fa6609bdc6e8763ec286 (diff)
parentd73778e4b86755d527a0c6b249cde846770b2f66 (diff)
Merge branch 'slab/for-6.11/buckets' into slab/for-next
Merge all the slab patches previously collected on top of v6.10-rc1, over cleanups/fixes that had to be based on rc6.
-rw-r--r--Documentation/core-api/memory-allocation.rst6
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/poison.h7
-rw-r--r--include/linux/slab.h97
-rw-r--r--ipc/msgutil.c13
-rw-r--r--kernel/configs/hardening.config1
-rw-r--r--lib/fortify_kunit.c2
-rw-r--r--lib/slub_kunit.c2
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/slab.h10
-rw-r--r--mm/slab_common.c111
-rw-r--r--mm/slub.c129
-rw-r--r--mm/util.c23
-rw-r--r--rust/kernel/alloc/allocator.rs19
-rwxr-xr-xscripts/kernel-doc1
-rw-r--r--tools/include/linux/poison.h7
16 files changed, 315 insertions, 136 deletions
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index 1c58d883b273..8b84eb4bdae7 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -144,8 +144,10 @@ configuration, but it is a good practice to use `kmalloc` for objects
smaller than page size.
The address of a chunk allocated with `kmalloc` is aligned to at least
-ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
-alignment is also guaranteed to be at least the respective size.
+ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
+alignment is also guaranteed to be at least the respective size. For other
+sizes, the alignment is guaranteed to be at least the largest power-of-two
+divisor of the size.
Chunks allocated with kmalloc() can be resized with krealloc(). Similarly
to kmalloc_array(): a helper for resizing arrays is provided in the form of
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb7c96d24ac0..836ccb1f95a8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1110,7 +1110,7 @@ static inline unsigned int compound_order(struct page *page)
*
* Return: The order of the folio.
*/
-static inline unsigned int folio_order(struct folio *folio)
+static inline unsigned int folio_order(const struct folio *folio)
{
if (!folio_test_large(folio))
return 0;
@@ -2150,7 +2150,7 @@ static inline struct folio *folio_next(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The base-2 logarithm of the size of this folio.
*/
-static inline unsigned int folio_shift(struct folio *folio)
+static inline unsigned int folio_shift(const struct folio *folio)
{
return PAGE_SHIFT + folio_order(folio);
}
@@ -2163,7 +2163,7 @@ static inline unsigned int folio_shift(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The number of bytes in this folio.
*/
-static inline size_t folio_size(struct folio *folio)
+static inline size_t folio_size(const struct folio *folio)
{
return PAGE_SIZE << folio_order(folio);
}
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 1f0ee2459f2a..9c1a035af97c 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -38,11 +38,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
-#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
-#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
-
-#define SLUB_RED_INACTIVE 0xbb
-#define SLUB_RED_ACTIVE 0xcc
+#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
+#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7247e217e21b..d99afce36098 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -426,8 +426,9 @@ enum kmalloc_cache_type {
NR_KMALLOC_TYPES
};
-extern struct kmem_cache *
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
+
+extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
/*
* Define gfp bits that should not be set for KMALLOC_NORMAL.
@@ -528,9 +529,6 @@ static_assert(PAGE_SHIFT <= 20);
#include <linux/alloc_tag.h>
-void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
-#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
-
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
@@ -551,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
void kmem_cache_free(struct kmem_cache *s, void *objp);
+kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
+ unsigned int useroffset, unsigned int usersize,
+ void (*ctor)(void *));
+
/*
* Bulk allocation and freeing operations. These are accelerated in an
* allocator specific way to avoid taking locks repeatedly or building
@@ -568,31 +570,49 @@ static __always_inline void kfree_bulk(size_t size, void **p)
kmem_cache_free_bulk(NULL, size, p);
}
-void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
- __alloc_size(1);
-#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
-
void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
-void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
- __assume_kmalloc_alignment __alloc_size(3);
+/*
+ * These macros allow declaring a kmem_buckets * parameter alongside size, which
+ * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
+ * sites don't have to pass NULL.
+ */
+#ifdef CONFIG_SLAB_BUCKETS
+#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b)
+#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b)
+#define PASS_BUCKET_PARAM(_b) (_b)
+#else
+#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size)
+#define PASS_BUCKET_PARAMS(_size, _b) (_size)
+#define PASS_BUCKET_PARAM(_b) NULL
+#endif
+
+/*
+ * The following functions are not to be used directly and are intended only
+ * for internal use from kmalloc() and kmalloc_node()
+ * with the exception of kunit tests
+ */
+
+void *__kmalloc_noprof(size_t size, gfp_t flags)
+ __assume_kmalloc_alignment __alloc_size(1);
-void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size) __assume_kmalloc_alignment
- __alloc_size(4);
-#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
+void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+ __assume_kmalloc_alignment __alloc_size(1);
-#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
+void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
+ __assume_kmalloc_alignment __alloc_size(3);
-void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
- __alloc_size(1);
-#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
+void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size)
+ __assume_kmalloc_alignment __alloc_size(4);
-void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
- __alloc_size(1);
-#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
+void *__kmalloc_large_noprof(size_t size, gfp_t flags)
+ __assume_page_alignment __alloc_size(1);
+
+void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
+ __assume_page_alignment __alloc_size(1);
/**
* kmalloc - allocate kernel memory
@@ -604,7 +624,8 @@ void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_pag
*
* The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
* bytes. For @size of power of two bytes, the alignment is also guaranteed
- * to be at least to the size.
+ * to be at least to the size. For other sizes, the alignment is guaranteed to
+ * be at least the largest power-of-two divisor of @size.
*
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp_types.h and described at
@@ -654,10 +675,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
- return kmalloc_large_noprof(size, flags);
+ return __kmalloc_large_noprof(size, flags);
index = kmalloc_index(size);
- return kmalloc_trace_noprof(
+ return __kmalloc_cache_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
@@ -665,20 +686,26 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
+#define kmem_buckets_alloc(_b, _size, _flags) \
+ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+
+#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \
+ alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
+
static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
- return kmalloc_large_node_noprof(size, flags, node);
+ return __kmalloc_large_node_noprof(size, flags, node);
index = kmalloc_index(size);
- return kmalloc_node_trace_noprof(
+ return __kmalloc_cache_node_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
- return __kmalloc_node_noprof(size, flags, node);
+ return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
}
#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
@@ -729,8 +756,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
*/
#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO)
-void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
- unsigned long caller) __alloc_size(1);
+void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
+ unsigned long caller) __alloc_size(1);
+#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
+ __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
#define kmalloc_node_track_caller(...) \
alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
@@ -756,7 +785,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
return kmalloc_node_noprof(bytes, flags, node);
- return __kmalloc_node_noprof(bytes, flags, node);
+ return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
}
#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
@@ -780,7 +809,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
-extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node_noprof(size, flags, node) \
+ __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE)
@@ -788,6 +819,8 @@ extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_si
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
+#define kmem_buckets_valloc(_b, _size, _flags) \
+ alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
static inline __alloc_size(1, 2) void *
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index d0a0e877cadd..c7be0c792647 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -42,6 +42,17 @@ struct msg_msgseg {
#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
+static kmem_buckets *msg_buckets __ro_after_init;
+
+static int __init init_msg_buckets(void)
+{
+ msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT,
+ sizeof(struct msg_msg),
+ DATALEN_MSG, NULL);
+
+ return 0;
+}
+subsys_initcall(init_msg_buckets);
static struct msg_msg *alloc_msg(size_t len)
{
@@ -50,7 +61,7 @@ static struct msg_msg *alloc_msg(size_t len)
size_t alen;
alen = min(len, DATALEN_MSG);
- msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
+ msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
return NULL;
diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config
index 8a7ce7a6b3ab..3fabb8f55ef6 100644
--- a/kernel/configs/hardening.config
+++ b/kernel/configs/hardening.config
@@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y
# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_SLAB_BUCKETS=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
CONFIG_RANDOM_KMALLOC_CACHES=y
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index e17d520f532c..9a7f9367b8f2 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -234,8 +234,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \
checker(expected_size, \
kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \
kfree(p)); \
- checker(expected_size, __kmalloc(alloc_size, gfp), \
- kfree(p)); \
\
orig = kmalloc(alloc_size, gfp); \
KUNIT_EXPECT_TRUE(test, orig != NULL); \
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 4ce960438806..e6667a28c014 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -140,7 +140,7 @@ static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
- u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
+ u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18);
kasan_disable_current();
diff --git a/mm/Kconfig b/mm/Kconfig
index b4cb45255a54..e0dfb268717c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -273,6 +273,23 @@ config SLAB_FREELIST_HARDENED
sacrifices to harden the kernel slab allocator against common
freelist exploit methods.
+config SLAB_BUCKETS
+ bool "Support allocation from separate kmalloc buckets"
+ depends on !SLUB_TINY
+ default SLAB_FREELIST_HARDENED
+ help
+ Kernel heap attacks frequently depend on being able to create
+ specifically-sized allocations with user-controlled contents
+ that will be allocated into the same kmalloc bucket as a
+ target object. To avoid sharing these allocation buckets,
+ provide an explicitly separated set of buckets to be used for
+ user-controlled allocations. This may very slightly increase
+ memory fragmentation, though in practice it's only a handful
+ of extra pages since the bulk of user-controlled allocations
+ are relatively long-lived.
+
+ If unsure, say Y.
+
config SLUB_STATS
default n
bool "Enable performance statistics"
diff --git a/mm/slab.h b/mm/slab.h
index a240945487e0..ece18ef5dd04 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -168,7 +168,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
- return folio_test_active((struct folio *)slab_folio(slab));
+ return folio_test_active(slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
@@ -213,7 +213,7 @@ static inline struct slab *virt_to_slab(const void *addr)
static inline int slab_order(const struct slab *slab)
{
- return folio_order((struct folio *)slab_folio(slab));
+ return folio_order(slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
@@ -405,16 +405,18 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
-kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
+kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
{
unsigned int index;
+ if (!b)
+ b = &kmalloc_caches[kmalloc_type(flags, caller)];
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
- return kmalloc_caches[kmalloc_type(flags, caller)][index];
+ return (*b)[index];
}
gfp_t kmalloc_fix_flags(gfp_t flags);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1560a1546bb1..70943a4c1c4b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
}
EXPORT_SYMBOL(kmem_cache_create);
+static struct kmem_cache *kmem_buckets_cache __ro_after_init;
+
+/**
+ * kmem_buckets_create - Create a set of caches that handle dynamic sized
+ * allocations via kmem_buckets_alloc()
+ * @name: A prefix string which is used in /proc/slabinfo to identify this
+ * cache. The individual caches with have their sizes as the suffix.
+ * @flags: SLAB flags (see kmem_cache_create() for details).
+ * @useroffset: Starting offset within an allocation that may be copied
+ * to/from userspace.
+ * @usersize: How many bytes, starting at @useroffset, may be copied
+ * to/from userspace.
+ * @ctor: A constructor for the objects, run when new allocations are made.
+ *
+ * Cannot be called within an interrupt, but can be interrupted.
+ *
+ * Return: a pointer to the cache on success, NULL on failure. When
+ * CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and
+ * subsequent calls to kmem_buckets_alloc() will fall back to kmalloc().
+ * (i.e. callers only need to check for NULL on failure.)
+ */
+kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
+ unsigned int useroffset,
+ unsigned int usersize,
+ void (*ctor)(void *))
+{
+ kmem_buckets *b;
+ int idx;
+
+ /*
+ * When the separate buckets API is not built in, just return
+ * a non-NULL value for the kmem_buckets pointer, which will be
+ * unused when performing allocations.
+ */
+ if (!IS_ENABLED(CONFIG_SLAB_BUCKETS))
+ return ZERO_SIZE_PTR;
+
+ if (WARN_ON(!kmem_buckets_cache))
+ return NULL;
+
+ b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
+ if (WARN_ON(!b))
+ return NULL;
+
+ flags |= SLAB_NO_MERGE;
+
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+ char *short_size, *cache_name;
+ unsigned int cache_useroffset, cache_usersize;
+ unsigned int size;
+
+ if (!kmalloc_caches[KMALLOC_NORMAL][idx])
+ continue;
+
+ size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
+ if (!size)
+ continue;
+
+ short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
+ if (WARN_ON(!short_size))
+ goto fail;
+
+ cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
+ if (WARN_ON(!cache_name))
+ goto fail;
+
+ if (useroffset >= size) {
+ cache_useroffset = 0;
+ cache_usersize = 0;
+ } else {
+ cache_useroffset = useroffset;
+ cache_usersize = min(size - cache_useroffset, usersize);
+ }
+ (*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
+ 0, flags, cache_useroffset,
+ cache_usersize, ctor);
+ kfree(cache_name);
+ if (WARN_ON(!(*b)[idx]))
+ goto fail;
+ }
+
+ return b;
+
+fail:
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
+ kmem_cache_destroy((*b)[idx]);
+ kfree(b);
+
+ return NULL;
+}
+EXPORT_SYMBOL(kmem_buckets_create);
+
#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
@@ -617,11 +709,12 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
s->size = s->object_size = size;
/*
- * For power of two sizes, guarantee natural alignment for kmalloc
- * caches, regardless of SL*B debugging options.
+ * kmalloc caches guarantee alignment of at least the largest
+ * power-of-two divisor of the size. For power-of-two sizes,
+ * it is the size itself.
*/
- if (is_power_of_2(size))
- align = max(align, size);
+ if (flags & SLAB_KMALLOC)
+ align = max(align, 1U << (ffs(size) - 1));
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
@@ -653,8 +746,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
return s;
}
-struct kmem_cache *
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
+kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init =
{ /* initialization for https://llvm.org/pr42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
@@ -703,7 +795,7 @@ size_t kmalloc_size_roundup(size_t size)
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
- return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
+ return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size;
}
/* Above the smaller buckets, size is a multiple of page size. */
@@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void)
/* Kmalloc array is now usable */
slab_state = UP;
+
+ if (IS_ENABLED(CONFIG_SLAB_BUCKETS))
+ kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
+ sizeof(kmem_buckets),
+ 0, SLAB_NO_MERGE, NULL);
}
/**
diff --git a/mm/slub.c b/mm/slub.c
index ab199b67bcd0..829a1f08e8a2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -788,8 +788,24 @@ static bool slab_add_kunit_errors(void)
kunit_put_resource(resource);
return true;
}
+
+static bool slab_in_kunit_test(void)
+{
+ struct kunit_resource *resource;
+
+ if (!kunit_get_current_test())
+ return false;
+
+ resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
+ if (!resource)
+ return false;
+
+ kunit_put_resource(resource);
+ return true;
+}
#else
static inline bool slab_add_kunit_errors(void) { return false; }
+static inline bool slab_in_kunit_test(void) { return false; }
#endif
static inline unsigned int size_from_object(struct kmem_cache *s)
@@ -962,11 +978,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_slab_info(const struct slab *slab)
{
- struct folio *folio = (struct folio *)slab_folio(slab);
-
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
- folio_flags(folio, 0));
+ &slab->__page_flags);
}
/*
@@ -1192,8 +1206,6 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
- print_trailer(s, slab, object);
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
restore_bytes(s, what, value, fault, end);
@@ -1216,8 +1228,8 @@ skip_bug_print:
* Padding is extended by another word if Redzoning is enabled and
* object_size == inuse.
*
- * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
- * 0xcc (RED_ACTIVE) for objects in use.
+ * We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
+ * 0xcc (SLUB_RED_ACTIVE) for objects in use.
*
* object + s->inuse
* Meta data starts here.
@@ -1302,15 +1314,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size, kasan_meta_size;
+ int ret = 1;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
- return 0;
+ ret = 0;
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
- return 0;
+ ret = 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
@@ -1319,14 +1332,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
val, s->object_size - orig_size)) {
- return 0;
+ ret = 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
- check_bytes_and_report(s, slab, p, "Alignment padding",
+ if (!check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
- s->inuse - s->object_size);
+ s->inuse - s->object_size))
+ ret = 0;
}
}
@@ -1342,27 +1356,25 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, p, "Poison",
p + kasan_meta_size, POISON_FREE,
s->object_size - kasan_meta_size - 1))
- return 0;
+ ret = 0;
if (kasan_meta_size < s->object_size &&
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1))
- return 0;
+ ret = 0;
}
/*
* check_pad_bytes cleans up on its own.
*/
- check_pad_bytes(s, slab, p);
+ if (!check_pad_bytes(s, slab, p))
+ ret = 0;
}
- if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
- /*
- * Object and freepointer overlap. Cannot check
- * freepointer while object is allocated.
- */
- return 1;
-
- /* Check free pointer validity */
- if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
+ /*
+ * Cannot check freepointer while object is allocated if
+ * object and freepointer overlap.
+ */
+ if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) &&
+ !check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
@@ -1370,9 +1382,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
- return 0;
+ ret = 0;
}
- return 1;
+
+ if (!ret && !slab_in_kunit_test()) {
+ print_trailer(s, slab, object);
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ }
+
+ return ret;
}
static int check_slab(struct kmem_cache *s, struct slab *slab)
@@ -2554,7 +2572,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
- return folio_test_workingset((struct folio *)slab_folio(slab));
+ return folio_test_workingset(slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
@@ -4063,7 +4081,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
-static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
+static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
void *ptr = NULL;
@@ -4088,35 +4106,35 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
-void *kmalloc_large_noprof(size_t size, gfp_t flags)
+void *__kmalloc_large_noprof(size_t size, gfp_t flags)
{
- void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
+ void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_noprof);
+EXPORT_SYMBOL(__kmalloc_large_noprof);
-void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
+void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
- void *ret = __kmalloc_large_node(size, flags, node);
+ void *ret = ___kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_node_noprof);
+EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
-void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
+void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
- ret = __kmalloc_large_node(size, flags, node);
+ ret = __kmalloc_large_node_noprof(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
@@ -4125,34 +4143,34 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
- s = kmalloc_slab(size, flags, caller);
+ s = kmalloc_slab(size, b, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
-
-void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
- return __do_kmalloc_node(size, flags, node, _RET_IP_);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node_noprof);
void *__kmalloc_noprof(size_t size, gfp_t flags)
{
- return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
+ return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_noprof);
-void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
- int node, unsigned long caller)
+void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
+ int node, unsigned long caller)
{
- return __do_kmalloc_node(size, flags, node, caller);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
+
}
-EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
+EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
-void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
@@ -4162,10 +4180,10 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_trace_noprof);
+EXPORT_SYMBOL(__kmalloc_cache_noprof);
-void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size)
+void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
@@ -4174,7 +4192,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_node_trace_noprof);
+EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
@@ -5159,10 +5177,9 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
- if (slub_debug_orig_size(s) ||
- (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
- ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
- s->ctor) {
+ if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor ||
+ ((flags & SLAB_RED_ZONE) &&
+ (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
@@ -5170,7 +5187,9 @@ static int calculate_sizes(struct kmem_cache *s)
*
* This is the case if we do RCU, have a constructor or
* destructor, are poisoning the objects, or are
- * redzoning an object smaller than sizeof(void *).
+ * redzoning an object smaller than sizeof(void *) or are
+ * redzoning an object with slub_debug_orig_size() enabled,
+ * in which case the right redzone may be extended.
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
diff --git a/mm/util.c b/mm/util.c
index fe723241b66f..d87e73692cf5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -198,6 +198,16 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
+static kmem_buckets *user_buckets __ro_after_init;
+
+static int __init init_user_buckets(void)
+{
+ user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL);
+
+ return 0;
+}
+subsys_initcall(init_user_buckets);
+
/**
* memdup_user - duplicate memory region from user space
*
@@ -211,7 +221,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+ p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -237,7 +247,7 @@ void *vmemdup_user(const void __user *src, size_t len)
{
void *p;
- p = kvmalloc(len, GFP_USER);
+ p = kmem_buckets_valloc(user_buckets, len, GFP_USER);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -594,9 +604,10 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
EXPORT_SYMBOL(vm_mmap);
/**
- * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
+ * @b: which set of kmalloc buckets to allocate from.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
@@ -609,7 +620,7 @@ EXPORT_SYMBOL(vm_mmap);
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
-void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
@@ -631,7 +642,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
kmalloc_flags &= ~__GFP_NOFAIL;
}
- ret = kmalloc_node_noprof(size, kmalloc_flags, node);
+ ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
@@ -660,7 +671,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
-EXPORT_SYMBOL(kvmalloc_node_noprof);
+EXPORT_SYMBOL(__kvmalloc_node_noprof);
/**
* kvfree() - Free memory.
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
index 229642960cd1..e6ea601f38c6 100644
--- a/rust/kernel/alloc/allocator.rs
+++ b/rust/kernel/alloc/allocator.rs
@@ -18,23 +18,16 @@ pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: F
// Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
let layout = new_layout.pad_to_align();
- let mut size = layout.size();
-
- if layout.align() > bindings::ARCH_SLAB_MINALIGN {
- // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
- // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
- // more information).
- //
- // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
- // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
- size = size.next_power_of_two();
- }
+ // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()`
+ // which together with the slab guarantees means the `krealloc` will return a properly aligned
+ // object (see comments in `kmalloc()` for more information).
+ let size = layout.size();
// SAFETY:
// - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
// function safety requirement.
- // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
- // according to the function safety requirement) or a result from `next_power_of_two()`.
+ // - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according
+ // to the function safety requirement)
unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 }
}
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 95a59ac78f82..2791f8195203 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1729,6 +1729,7 @@ sub dump_function($$) {
$prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
$prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
$prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
+ $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/;
my $define = $prototype =~ s/^#\s*define\s+//; #ak added
$prototype =~ s/__attribute_const__ +//;
$prototype =~ s/__attribute__\s*\(\(
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 2e6338ac5eed..e530e54046c9 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -47,11 +47,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
-#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
-#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
-
-#define SLUB_RED_INACTIVE 0xbb
-#define SLUB_RED_ACTIVE 0xcc
+#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
+#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */