summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c84
-rw-r--r--kernel/bpf/verifier.c7
2 files changed, 77 insertions, 14 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650d52fc..570eeca7bdfa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
struct bucket *buckets;
void *elems;
struct pcpu_freelist freelist;
+ void __percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
};
+enum extra_elem_state {
+ HTAB_NOT_AN_EXTRA_ELEM = 0,
+ HTAB_EXTRA_ELEM_FREE,
+ HTAB_EXTRA_ELEM_USED
+};
+
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
@@ -38,7 +45,10 @@ struct htab_elem {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
};
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ enum extra_elem_state state;
+ };
u32 hash;
char key[0] __aligned(8);
};
@@ -113,6 +123,23 @@ free_elems:
return err;
}
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+ void __percpu *pptr;
+ int cpu;
+
+ pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+ if (!pptr)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+ HTAB_EXTRA_ELEM_FREE;
+ }
+ htab->extra_elems = pptr;
+ return 0;
+}
+
/* Called from syscall */
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (percpu)
cost += (u64) round_up(htab->map.value_size, 8) *
num_possible_cpus() * htab->map.max_entries;
+ else
+ cost += (u64) htab->elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->buckets[i].lock);
}
+ if (!percpu) {
+ err = alloc_extra_elems(htab);
+ if (err)
+ goto free_buckets;
+ }
+
if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
err = prealloc_elems_and_freelist(htab);
if (err)
- goto free_buckets;
+ goto free_extra_elems;
}
return &htab->map;
+free_extra_elems:
+ free_percpu(htab->extra_elems);
free_buckets:
kvfree(htab->buckets);
free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
kfree(l);
-
}
static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
+ if (l->state == HTAB_EXTRA_ELEM_USED) {
+ l->state = HTAB_EXTRA_ELEM_FREE;
+ return;
+ }
+
if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
- bool percpu, bool onallcpus)
+ bool percpu, bool onallcpus,
+ bool old_elem_exists)
{
u32 size = htab->map.value_size;
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
struct htab_elem *l_new;
void __percpu *pptr;
+ int err = 0;
if (prealloc) {
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
if (!l_new)
- return ERR_PTR(-E2BIG);
+ err = -E2BIG;
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
- return ERR_PTR(-E2BIG);
+ err = -E2BIG;
+ } else {
+ l_new = kmalloc(htab->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!l_new)
+ return ERR_PTR(-ENOMEM);
}
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
- if (!l_new)
- return ERR_PTR(-ENOMEM);
+ }
+
+ if (err) {
+ if (!old_elem_exists)
+ return ERR_PTR(err);
+
+ /* if we're updating the existing element and the hash table
+ * is full, use per-cpu extra elems
+ */
+ l_new = this_cpu_ptr(htab->extra_elems);
+ if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+ return ERR_PTR(-E2BIG);
+ l_new->state = HTAB_EXTRA_ELEM_USED;
+ } else {
+ l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
}
memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (ret)
goto err;
- l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+ !!l_old);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true, onallcpus);
+ hash, true, onallcpus, false);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
htab_free_elems(htab);
pcpu_freelist_destroy(&htab->freelist);
}
+ free_percpu(htab->extra_elems);
kvfree(htab->buckets);
kfree(htab);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f72f23b8fdab..daea765d72e6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,7 @@ struct verifier_env {
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
+ u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
};
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_in_cgroup)
+ if (func_id != BPF_FUNC_skb_under_cgroup)
goto error;
break;
default:
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
- case BPF_FUNC_skb_in_cgroup:
+ case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
break;
@@ -1301,7 +1302,7 @@ add_imm:
/* dst_reg stays as pkt_ptr type and since some positive
* integer value was added to the pointer, increment its 'id'
*/
- dst_reg->id++;
+ dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range and off to zero */
dst_reg->off = 0;