diff options
author | David S. Miller <davem@davemloft.net> | 2017-03-15 15:13:13 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-03-15 15:13:13 -0700 |
commit | e11607aad5edf4c41617a27291731c660f7d519d (patch) | |
tree | 4de77f06a44efb3a82d0600968ce94bac1078460 /net | |
parent | 3d20f1f7bd575d147ffa75621fa560eea0aec690 (diff) | |
parent | 4494dbc6dec37817f2cc2aa7604039a9e87ada18 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says:
====================
Netfilter fixes for net
The following patchset contains Netfilter fixes for your net tree, a
rather large batch of fixes targeted to nf_tables, conntrack and bridge
netfilter. More specifically, they are:
1) Don't track fragmented packets if the socket option IP_NODEFRAG is set.
From Florian Westphal.
2) SCTP protocol tracker assumes that ICMP error messages contain the
checksum field, what results in packet drops. From Ying Xue.
3) Fix inconsistent handling of AH traffic from nf_tables.
4) Fix new bitmap set representation with big endian. Fix mismatches in
nf_tables due to incorrect big endian handling too. Both patches
from Liping Zhang.
5) Bridge netfilter doesn't honor maximum fragment size field, cap to
largest fragment seen. From Florian Westphal.
6) Fake conntrack entry needs to be aligned to 8 bytes since the 3 LSB
bits are now used to store the ctinfo. From Steven Rostedt.
7) Fix element comments with the bitmap set type. Revert the flush
field in the nft_set_iter structure, not required anymore after
fixing up element comments.
8) Missing error on invalid conntrack direction from nft_ct, also from
Liping Zhang.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_netfilter_hooks.c | 12 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_masq_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_redir_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_masq_ipv6.c | 8 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_redir_ipv6.c | 8 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 6 | ||||
-rw-r--r-- | net/netfilter/nf_nat_proto_sctp.c | 13 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 4 | ||||
-rw-r--r-- | net/netfilter/nft_ct.c | 21 | ||||
-rw-r--r-- | net/netfilter/nft_meta.c | 40 | ||||
-rw-r--r-- | net/netfilter/nft_nat.c | 8 | ||||
-rw-r--r-- | net/netfilter/nft_set_bitmap.c | 165 |
14 files changed, 160 insertions, 150 deletions
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index fa87fbd62bb7..1f1e62095464 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bc1486f2c064..2e14ed11a35c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b..6f5e8d01b876 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); - /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index a0ea8aad1bf1..f18677277119 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), &range, nft_out(pkt)); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 1650ed23c15d..5120be1d3118 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - mr.range[0].max.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + mr.range[0].min.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + mr.range[0].max.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 6c5b5b1830a7..4146536e9c15 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, nft_out(pkt)); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index f5ac080fc084..a27e424f690d 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min], - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max], + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..ffb78e5f7b70 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; -DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used + * for the nfctinfo. We cheat by (ab)using the PER CPU cache line + * alignment to enforce this. + */ +DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static unsigned int nf_conntrack_hash_rnd __read_mostly; diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 31d358691af0..804e8a0ab36e 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb, enum nf_nat_manip_type maniptype) { sctp_sctphdr_t *hdr; + int hdrsize = 8; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + /* This could be an inner header returned in imcp packet; in such + * cases we cannot update the checksum field since it is outside + * of the 8 bytes of transport layer headers we are guaranteed. + */ + if (skb->len >= hdroff + sizeof(*hdr)) + hdrsize = sizeof(*hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) return false; hdr = (struct sctphdr *)(skb->data + hdroff); @@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb, hdr->dest = tuple->dst.u.sctp.port; } + if (hdrsize < sizeof(*hdr)) + return true; + if (skb->ip_summed != CHECKSUM_PARTIAL) { hdr->checksum = sctp_compute_cksum(skb, hdroff); skb->ip_summed = CHECKSUM_NONE; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e0ccfd5bb37..434c739dfeca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_set_iter iter = { .genmask = genmask, .fn = nft_flush_set, - .flush = true, }; set->ops->walk(&ctx, set, &iter); @@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bf548a7a71ec..0264258c46fe 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - *dest = CTINFO2DIR(ctinfo); + nft_reg_store8(dest, CTINFO2DIR(ctinfo)); return; case NFT_CT_STATUS: *dest = ct->status; @@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); + nft_reg_store8(dest, nf_ct_l3num(ct)); return; case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); + nft_reg_store8(dest, nf_ct_protonum(ct)); return; #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: { const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + u16 zoneid; if (priv->dir < IP_CT_DIR_MAX) - *dest = nf_ct_zone_id(zone, priv->dir); + zoneid = nf_ct_zone_id(zone, priv->dir); else - *dest = zone->id; + zoneid = zone->id; + nft_reg_store16(dest, zoneid); return; } #endif @@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTO_SRC: - *dest = (__force __u16)tuple->src.u.all; + nft_reg_store16(dest, (__force u16)tuple->src.u.all); return; case NFT_CT_PROTO_DST: - *dest = (__force __u16)tuple->dst.u.all; + nft_reg_store16(dest, (__force u16)tuple->dst.u.all); return; default: break; @@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; enum ip_conntrack_info ctinfo; - u16 value = regs->data[priv->sreg]; + u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); @@ -542,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err1; } } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e1f5ca9b423b..7b60e01f38ff 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - *dest = 0; - *(__be16 *)dest = skb->protocol; + nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: - *dest = nft_pf(pkt); + nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) goto err; - *dest = pkt->tprot; + nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; @@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_IIFTYPE: if (in == NULL) goto err; - *dest = 0; - *(u16 *)dest = in->type; + nft_reg_store16(dest, in->type); break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *dest = 0; - *(u16 *)dest = out->type; + nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: sk = skb_to_full_sk(skb); @@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr, #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - *dest = skb->pkt_type; + nft_reg_store8(dest, skb->pkt_type); break; } switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { @@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; if (ipv4_is_multicast(iph->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); @@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = regs->data[meta->sreg]; + u32 *sreg = ®s->data[meta->sreg]; + u32 value = *sreg; + u8 pkt_type; switch (meta->key) { case NFT_META_MARK: @@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - if (skb->pkt_type != value && - skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = value; + pkt_type = nft_reg_load8(sreg); + + if (skb->pkt_type != pkt_type && + skb_pkt_type_ok(pkt_type) && + skb_pkt_type_ok(skb->pkt_type)) + skb->pkt_type = pkt_type; break; case NFT_META_NFTRACE: skb->nf_trace = !!value; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 19a7bf3236f9..439e0bd152a0 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 152d226552c1..8ebbc2940f4c 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -15,6 +15,11 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +struct nft_bitmap_elem { + struct list_head head; + struct nft_set_ext ext; +}; + /* This bitmap uses two bits to represent one element. These two bits determine * the element state in the current and the future generation. * @@ -41,13 +46,22 @@ * restore its previous state. */ struct nft_bitmap { - u16 bitmap_size; - u8 bitmap[]; + struct list_head list; + u16 bitmap_size; + u8 bitmap[]; }; -static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off) +static inline void nft_bitmap_location(const struct nft_set *set, + const void *key, + u32 *idx, u32 *off) { - u32 k = (key << 1); + u32 k; + + if (set->klen == 2) + k = *(u16 *)key; + else + k = *(u8 *)key; + k <<= 1; *idx = k / BITS_PER_BYTE; *off = k % BITS_PER_BYTE; @@ -69,26 +83,48 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_cur(net); u32 idx, off; - nft_bitmap_location(*key, &idx, &off); + nft_bitmap_location(set, key, &idx, &off); return nft_bitmap_active(priv->bitmap, idx, off, genmask); } +static struct nft_bitmap_elem * +nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, + u8 genmask) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), + nft_set_ext_key(&this->ext), set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return NULL; +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **_ext) + struct nft_set_ext **ext) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); - if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + be = nft_bitmap_elem_find(set, new, genmask); + if (be) { + *ext = &be->ext; return -EEXIST; + } + nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off); /* Enter 01 state. */ priv->bitmap[idx] |= (genmask << off); + list_add_tail_rcu(&new->head, &priv->list); return 0; } @@ -98,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 00 state. */ priv->bitmap[idx] &= ~(genmask << off); + list_del_rcu(&be->head); } static void nft_bitmap_activate(const struct net *net, @@ -112,74 +149,52 @@ static void nft_bitmap_activate(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); + nft_set_elem_change_active(net, set, &be->ext); } static bool nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *ext) + const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); + struct nft_bitmap_elem *be = _be; u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); return true; } -static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) - return NULL; - - nft_set_ext_init(ext, &tmpl); - memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen); - - return ext; -} - static void *nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); - struct nft_set_ext *ext; - u32 idx, off, key = 0; - - memcpy(&key, elem->key.val.data, set->klen); - nft_bitmap_location(key, &idx, &off); + u32 idx, off; - if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) - return NULL; + nft_bitmap_location(set, elem->key.val.data, &idx, &off); - /* We have no real set extension since this is a bitmap, allocate this - * dummy object that is released from the commit/abort path. - */ - ext = nft_bitmap_ext_alloc(set, elem); - if (!ext) + be = nft_bitmap_elem_find(set, this, genmask); + if (!be) return NULL; /* Enter 10 state. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); - return ext; + return be; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -187,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext_tmpl tmpl; + struct nft_bitmap_elem *be; struct nft_set_elem elem; - struct nft_set_ext *ext; - int idx, off; - u16 key; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - for (idx = 0; idx < priv->bitmap_size; idx++) { - for (off = 0; off < BITS_PER_BYTE; off += 2) { - if (iter->count < iter->skip) - goto cont; - - if (!nft_bitmap_active(priv->bitmap, idx, off, - iter->genmask)) - goto cont; - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) { - iter->err = -ENOMEM; - return; - } - nft_set_ext_init(ext, &tmpl); - key = ((idx * BITS_PER_BYTE) + off) >> 1; - memcpy(nft_set_ext_key(ext), &key, set->klen); - - elem.priv = ext; - iter->err = iter->fn(ctx, set, iter, &elem); - - /* On set flush, this dummy extension object is released - * from the commit/abort path. - */ - if (!iter->flush) - kfree(ext); - - if (iter->err < 0) - return; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (iter->count < iter->skip) + goto cont; + if (!nft_set_elem_active(&be->ext, iter->genmask)) + goto cont; + + elem.priv = be; + + iter->err = iter->fn(ctx, set, iter, &elem); + + if (iter->err < 0) + return; cont: - iter->count++; - } + iter->count++; } } @@ -258,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); return 0; @@ -283,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_bitmap_ops __read_mostly = { .privsize = nft_bitmap_privsize, + .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, .init = nft_bitmap_init, .destroy = nft_bitmap_destroy, |