diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-09 15:59:22 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-09 15:59:23 -0800 |
commit | 77bbcb60f734fe25e1d17a7bdc9a4260868efb0f (patch) | |
tree | aec2d1cfd0e0872ca3cffa30b2198014f9feb67e /net/netfilter | |
parent | 9f3248c9dd51912ded87562a892ad947da2eebfa (diff) | |
parent | 4a80e026981b791da3937470ace84796490c7796 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for net-next. This
includes one patch to update ovs and act_ct to use nf_ct_put() instead
of nf_conntrack_put().
1) Add netns_tracker to nfnetlink_log and masquerade, from Eric Dumazet.
2) Remove redundant rcu read-size lock in nf_tables packet path.
3) Replace BUG() by WARN_ON_ONCE() in nft_payload.
4) Consolidate rule verdict tracing.
5) Replace WARN_ON() by WARN_ON_ONCE() in nf_tables core.
6) Make counter support built-in in nf_tables.
7) Add new field to conntrack object to identify locally generated
traffic, from Florian Westphal.
8) Prevent NAT from shadowing well-known ports, from Florian Westphal.
9) Merge nf_flow_table_{ipv4,ipv6} into nf_flow_table_inet, also from
Florian.
10) Remove redundant pointer in nft_pipapo AVX2 support, from Colin Ian King.
11) Replace opencoded max() in conntrack, from Jiapeng Chong.
12) Update conntrack to use refcount_t API, from Florian Westphal.
13) Move ip_ct_attach indirection into the nf_ct_hook structure.
14) Constify several pointer object in the netfilter codebase,
from Florian Westphal.
15) Tree-wide replacement of nf_conntrack_put() by nf_ct_put(), also
from Florian.
16) Fix egress splat due to incorrect rcu notation, from Florian.
17) Move stateful fields of connlimit, last, quota, numgen and limit
out of the expression data area.
18) Build a blob to represent the ruleset in nf_tables, this is a
requirement of the new register tracking infrastructure.
19) Add NFT_REG32_NUM to define the maximum number of 32-bit registers.
20) Add register tracking infrastructure to skip redundant
store-to-register operations, this includes support for payload,
meta and bitwise expresssions.
* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next: (32 commits)
netfilter: nft_meta: cancel register tracking after meta update
netfilter: nft_payload: cancel register tracking after payload update
netfilter: nft_bitwise: track register operations
netfilter: nft_meta: track register operations
netfilter: nft_payload: track register operations
netfilter: nf_tables: add register tracking infrastructure
netfilter: nf_tables: add NFT_REG32_NUM
netfilter: nf_tables: add rule blob layout
netfilter: nft_limit: move stateful fields out of expression data
netfilter: nft_limit: rename stateful structure
netfilter: nft_numgen: move stateful fields out of expression data
netfilter: nft_quota: move stateful fields out of expression data
netfilter: nft_last: move stateful fields out of expression data
netfilter: nft_connlimit: move stateful fields out of expression data
netfilter: egress: avoid a lockdep splat
net: prefer nf_ct_put instead of nf_conntrack_put
netfilter: conntrack: avoid useless indirection during conntrack destruction
netfilter: make function op structures const
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
netfilter: conntrack: convert to refcount_t api
...
====================
Link: https://lore.kernel.org/r/20220109231640.104123-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/netfilter')
29 files changed, 786 insertions, 285 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3646fc195e7d..ddc54b6d18ee 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -515,12 +515,6 @@ config NFT_FLOW_OFFLOAD This option adds the "flow_offload" expression that you can use to choose what flows are placed into the hardware. -config NFT_COUNTER - tristate "Netfilter nf_tables counter module" - help - This option adds the "counter" expression that you can use to - include packet and byte counters in a rule. - config NFT_CONNLIMIT tristate "Netfilter nf_tables connlimit module" depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index aab20e575ecd..a135b1a46014 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,7 +75,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \ - nft_chain_route.o nf_tables_offload.o \ + nft_counter.o nft_chain_route.o nf_tables_offload.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ nft_set_pipapo.o @@ -100,7 +100,6 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_REJECT_NETDEV) += nft_reject_netdev.o obj-$(CONFIG_NFT_TUNNEL) += nft_tunnel.o -obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 6dec9cd395f1..354cb472f386 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -666,32 +666,29 @@ EXPORT_SYMBOL(nf_hook_slow_list); /* This needs to be compiled in any case to avoid dependencies between the * nfnetlink_queue code and nf_conntrack. */ -struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; +const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nfnl_ct_hook); -struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; +const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_hook); #if IS_ENABLED(CONFIG_NF_CONNTRACK) -/* This does not belong here, but locally generated errors need it if connection - tracking in use: without this, connection may not be in hash table, and hence - manufactured ICMP or RST packets will not be associated with it. */ -void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) - __rcu __read_mostly; -EXPORT_SYMBOL(ip_ct_attach); - -struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; +const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_hook); +/* This does not belong here, but locally generated errors need it if connection + * tracking in use: without this, connection may not be in hash table, and hence + * manufactured ICMP or RST packets will not be associated with it. + */ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { - void (*attach)(struct sk_buff *, const struct sk_buff *); + const struct nf_ct_hook *ct_hook; if (skb->_nfct) { rcu_read_lock(); - attach = rcu_dereference(ip_ct_attach); - if (attach) - attach(new, skb); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ct_hook->attach(new, skb); rcu_read_unlock(); } } @@ -699,7 +696,7 @@ EXPORT_SYMBOL(nf_ct_attach); void nf_conntrack_destroy(struct nf_conntrack *nfct) { - struct nf_ct_hook *ct_hook; + const struct nf_ct_hook *ct_hook; rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); @@ -712,7 +709,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy); bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { - struct nf_ct_hook *ct_hook; + const struct nf_ct_hook *ct_hook; bool ret = false; rcu_read_lock(); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01d6589fba6e..894a325d39f2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -559,7 +559,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) -/* Released via destroy_conntrack() */ +/* Released via nf_ct_destroy() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) @@ -586,7 +586,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); nf_ct_zone_add(tmpl, zone); - atomic_set(&tmpl->ct_general.use, 0); + refcount_set(&tmpl->ct_general.use, 1); return tmpl; } @@ -613,13 +613,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } -static void -destroy_conntrack(struct nf_conntrack *nfct) +void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - pr_debug("destroy_conntrack(%p)\n", ct); - WARN_ON(atomic_read(&nfct->use) != 0); + pr_debug("%s(%p)\n", __func__, ct); + WARN_ON(refcount_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -644,9 +643,10 @@ destroy_conntrack(struct nf_conntrack *nfct) if (ct->master) nf_ct_put(ct->master); - pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); + pr_debug("%s: returning ct=%p to slab\n", __func__, ct); nf_conntrack_free(ct); } +EXPORT_SYMBOL(nf_ct_destroy); static void nf_ct_delete_from_lists(struct nf_conn *ct) { @@ -743,7 +743,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { - if (!atomic_inc_not_zero(&ct->ct_general.use)) + if (!refcount_inc_not_zero(&ct->ct_general.use)) return; if (nf_ct_should_gc(ct)) @@ -811,7 +811,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, * in, try to obtain a reference and re-check tuple */ ct = nf_ct_tuplehash_to_ctrack(h); - if (likely(atomic_inc_not_zero(&ct->ct_general.use))) { + if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -908,7 +908,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) smp_wmb(); /* The caller holds a reference to this object */ - atomic_set(&ct->ct_general.use, 2); + refcount_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); @@ -959,7 +959,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) { struct nf_conn_tstamp *tstamp; - atomic_inc(&ct->ct_general.use); + refcount_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */ @@ -990,7 +990,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_add_to_dying_list(loser_ct); - nf_conntrack_put(&loser_ct->ct_general); + nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); NF_CT_STAT_INC(net, clash_resolve); @@ -1352,7 +1352,7 @@ static unsigned int early_drop_list(struct net *net, nf_ct_is_dying(tmp)) continue; - if (!atomic_inc_not_zero(&tmp->ct_general.use)) + if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* kill only if still in same netns -- might have moved due to @@ -1470,7 +1470,7 @@ static void gc_worker(struct work_struct *work) continue; /* need to take reference to avoid possible races */ - if (!atomic_inc_not_zero(&tmp->ct_general.use)) + if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; if (gc_worker_skip_ct(tmp)) { @@ -1570,7 +1570,7 @@ __nf_conntrack_alloc(struct net *net, /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ - atomic_set(&ct->ct_general.use, 0); + refcount_set(&ct->ct_general.use, 0); return ct; out: atomic_dec(&cnet->count); @@ -1595,7 +1595,7 @@ void nf_conntrack_free(struct nf_conn *ct) /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ - WARN_ON(atomic_read(&ct->ct_general.use) != 0); + WARN_ON(refcount_read(&ct->ct_general.use) != 0); nf_ct_ext_destroy(ct); kmem_cache_free(nf_conntrack_cachep, ct); @@ -1687,8 +1687,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - /* Now it is inserted into the unconfirmed list, bump refcount */ - nf_conntrack_get(&ct->ct_general); + /* Now it is inserted into the unconfirmed list, set refcount to 1. */ + refcount_set(&ct->ct_general.use, 1); nf_ct_add_to_unconfirmed_list(ct); local_bh_enable(); @@ -1748,6 +1748,9 @@ resolve_normal_ct(struct nf_conn *tmpl, return 0; if (IS_ERR(h)) return PTR_ERR(h); + + ct = nf_ct_tuplehash_to_ctrack(h); + ct->local_origin = state->hook == NF_INET_LOCAL_OUT; } ct = nf_ct_tuplehash_to_ctrack(h); @@ -1919,7 +1922,7 @@ repeat: /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(&ct->ct_general); + nf_ct_put(ct); skb->_nfct = 0; NF_CT_STAT_INC_ATOMIC(state->net, invalid); if (ret == -NF_DROP) @@ -2083,9 +2086,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { + const struct nf_nat_hook *nat_hook; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - struct nf_nat_hook *nat_hook; unsigned int status; int dataoff; u16 l3num; @@ -2298,7 +2301,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), return NULL; found: - atomic_inc(&ct->ct_general.use); + refcount_inc(&ct->ct_general.use); spin_unlock(lockp); local_bh_enable(); return ct; @@ -2453,7 +2456,6 @@ static int kill_all(struct nf_conn *i, void *data) void nf_conntrack_cleanup_start(void) { conntrack_gc_work.exiting = true; - RCU_INIT_POINTER(ip_ct_attach, NULL); } void nf_conntrack_cleanup_end(void) @@ -2771,16 +2773,15 @@ err_cachep: return ret; } -static struct nf_ct_hook nf_conntrack_hook = { +static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, - .destroy = destroy_conntrack, + .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, + .attach = nf_conntrack_attach, }; void nf_conntrack_init_end(void) { - /* For use by REJECT target */ - RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 1e89b595ecd0..96948e98ec53 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -203,12 +203,12 @@ nf_ct_find_expectation(struct net *net, * about to invoke ->destroy(), or nf_ct_delete() via timeout * or early_drop(). * - * The atomic_inc_not_zero() check tells: If that fails, we + * The refcount_inc_not_zero() check tells: If that fails, we * know that the ct is being destroyed. If it succeeds, we * can be sure the ct cannot disappear underneath. */ if (unlikely(nf_ct_is_dying(exp->master) || - !atomic_inc_not_zero(&exp->master->ct_general.use))) + !refcount_inc_not_zero(&exp->master->ct_general.use))) return NULL; if (exp->flags & NF_CT_EXPECT_PERMANENT) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0be2a1ae5c17..ac438370f94a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -508,7 +508,7 @@ nla_put_failure: static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use)))) + if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use)))) goto nla_put_failure; return 0; @@ -1198,7 +1198,7 @@ restart: ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { if (i < ARRAY_SIZE(nf_ct_evict) && - atomic_inc_not_zero(&ct->ct_general.use)) + refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; continue; } @@ -1749,7 +1749,7 @@ restart: NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, dying, 0); if (res < 0) { - if (!atomic_inc_not_zero(&ct->ct_general.use)) + if (!refcount_inc_not_zero(&ct->ct_general.use)) continue; cb->args[0] = cpu; cb->args[1] = (unsigned long)ct; @@ -1820,7 +1820,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) __must_hold(RCU) { - struct nf_nat_hook *nat_hook; + const struct nf_nat_hook *nat_hook; int err; nat_hook = rcu_dereference(nf_nat_hook); @@ -2922,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff); } -static struct nfnl_ct_hook ctnetlink_glue_hook = { +static const struct nfnl_ct_hook ctnetlink_glue_hook = { .build_size = ctnetlink_glue_build_size, .build = ctnetlink_glue_build, .parse = ctnetlink_glue_parse, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 80f675d884b2..3e1afd10a9b6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v) int ret = 0; WARN_ON(!ct); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; if (nf_ct_should_gc(ct)) { @@ -370,7 +370,7 @@ static int ct_seq_show(struct seq_file *s, void *v) ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); - seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); + seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use)); if (seq_has_overflowed(s)) goto release; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index ed37bb9b4e58..b90eca7a2f22 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct) struct flow_offload *flow; if (unlikely(nf_ct_is_dying(ct) || - !atomic_inc_not_zero(&ct->ct_general.use))) + !refcount_inc_not_zero(&ct->ct_general.use))) return NULL; flow = kzalloc(sizeof(*flow), GFP_ATOMIC); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index bc4126d8ef65..5c57ade6bd05 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -54,8 +54,30 @@ static struct nf_flowtable_type flowtable_inet = { .owner = THIS_MODULE, }; +static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, + .init = nf_flow_table_init, + .setup = nf_flow_table_offload_setup, + .action = nf_flow_rule_route_ipv4, + .free = nf_flow_table_free, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, +}; + +static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, + .init = nf_flow_table_init, + .setup = nf_flow_table_offload_setup, + .action = nf_flow_rule_route_ipv6, + .free = nf_flow_table_free, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, +}; + static int __init nf_flow_inet_module_init(void) { + nft_register_flowtable_type(&flowtable_ipv4); + nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); return 0; @@ -64,6 +86,8 @@ static int __init nf_flow_inet_module_init(void) static void __exit nf_flow_inet_module_exit(void) { nft_unregister_flowtable_type(&flowtable_inet); + nft_unregister_flowtable_type(&flowtable_ipv6); + nft_unregister_flowtable_type(&flowtable_ipv4); } module_init(nf_flow_inet_module_init); @@ -71,5 +95,7 @@ module_exit(nf_flow_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module"); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ab9f6c75524d..2d06a66899b2 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -494,6 +494,38 @@ another_round: goto another_round; } +static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple) +{ + u16 sp, dp; + + switch (tuple->dst.protonum) { + case IPPROTO_TCP: + sp = ntohs(tuple->src.u.tcp.port); + dp = ntohs(tuple->dst.u.tcp.port); + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + sp = ntohs(tuple->src.u.udp.port); + dp = ntohs(tuple->dst.u.udp.port); + break; + default: + return false; + } + + /* IANA: System port range: 1-1023, + * user port range: 1024-49151, + * private port range: 49152-65535. + * + * Linux default ephemeral port range is 32768-60999. + * + * Enforce port remapping if sport is significantly lower + * than dport to prevent NAT port shadowing, i.e. + * accidental match of 'new' inbound connection vs. + * existing outbound one. + */ + return sp < 16384 && dp >= 32768; +} + /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -507,11 +539,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL; const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); + if (maniptype == NF_NAT_MANIP_SRC && + !random_port && + !ct->local_origin) + random_port = tuple_force_port_remap(orig_tuple); + /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -520,8 +558,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * So far, we don't do local source mappings, so multiple * manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { + if (maniptype == NF_NAT_MANIP_SRC && !random_port) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -545,7 +582,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { + if (!random_port) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, @@ -1130,7 +1167,7 @@ static struct pernet_operations nat_net_ops = { .size = sizeof(struct nat_net), }; -static struct nf_nat_hook nat_hook = { +static const struct nf_nat_hook nat_hook = { .parse_nat_setup = nfnetlink_parse_nat_setup, #ifdef CONFIG_XFRM .decode_session = __nf_nat_decode_session, diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index acd73f717a08..e32fac374608 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -12,6 +12,7 @@ struct masq_dev_work { struct work_struct work; struct net *net; + netns_tracker ns_tracker; union nf_inet_addr addr; int ifindex; int (*iter)(struct nf_conn *i, void *data); @@ -82,7 +83,7 @@ static void iterate_cleanup_work(struct work_struct *work) nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); - put_net(w->net); + put_net_track(w->net, &w->ns_tracker); kfree(w); atomic_dec(&masq_worker_count); module_put(THIS_MODULE); @@ -119,6 +120,7 @@ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = ifindex; w->net = net; + netns_tracker_alloc(net, &w->ns_tracker, gfp_flags); w->iter = iter; if (addr) w->addr = *addr; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 3d6d49420db8..2dfc5dae0656 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net) goto err2; __set_bit(IPS_CONFIRMED_BIT, &ct->status); - nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c20772822637..eb12fc9b803d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans) static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) { - struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0); - struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1); + struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0); + struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1); if (g0 != g1) kvfree(g1); kvfree(g0); /* should be NULL either via abort or via successful commit */ - WARN_ON_ONCE(chain->rules_next); - kvfree(chain->rules_next); + WARN_ON_ONCE(chain->blob_next); + kvfree(chain->blob_next); } void nf_tables_chain_destroy(struct nft_ctx *ctx) @@ -2002,23 +2002,39 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) struct nft_rules_old { struct rcu_head h; - struct nft_rule **start; + struct nft_rule_blob *blob; }; -static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain, - unsigned int alloc) +static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) { - if (alloc > INT_MAX) + struct nft_rule_dp *prule; + + prule = (struct nft_rule_dp *)ptr; + prule->is_last = 1; + ptr += offsetof(struct nft_rule_dp, data); + /* blob size does not include the trailer rule */ +} + +static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size) +{ + struct nft_rule_blob *blob; + + /* size must include room for the last rule */ + if (size < offsetof(struct nft_rule_dp, data)) + return NULL; + + size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old); + if (size > INT_MAX) return NULL; - alloc += 1; /* NULL, ends rules */ - if (sizeof(struct nft_rule *) > INT_MAX / alloc) + blob = kvmalloc(size, GFP_KERNEL); + if (!blob) return NULL; - alloc *= sizeof(struct nft_rule *); - alloc += sizeof(struct nft_rules_old); + blob->size = 0; + nft_last_rule(blob, blob->data); - return kvmalloc(alloc, GFP_KERNEL); + return blob; } static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family, @@ -2091,9 +2107,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats; struct net *net = ctx->net; char name[NFT_NAME_MAXLEN]; + struct nft_rule_blob *blob; struct nft_trans *trans; struct nft_chain *chain; - struct nft_rule **rules; + unsigned int data_size; int err; if (table->use == UINT_MAX) @@ -2178,15 +2195,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } - rules = nf_tables_chain_alloc_rules(chain, 0); - if (!rules) { + data_size = offsetof(struct nft_rule_dp, data); /* last rule */ + blob = nf_tables_chain_alloc_rules(data_size); + if (!blob) { err = -ENOMEM; goto err_destroy_chain; } - *rules = NULL; - rcu_assign_pointer(chain->rules_gen_0, rules); - rcu_assign_pointer(chain->rules_gen_1, rules); + RCU_INIT_POINTER(chain->blob_gen_0, blob); + RCU_INIT_POINTER(chain->blob_gen_1, blob); err = nf_tables_register_hook(net, table, chain); if (err < 0) @@ -8241,32 +8258,83 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { + const struct nft_expr *expr, *last; + struct nft_regs_track track = {}; + unsigned int size, data_size; + void *data, *data_boundary; + struct nft_rule_dp *prule; struct nft_rule *rule; - unsigned int alloc = 0; int i; /* already handled or inactive chain? */ - if (chain->rules_next || !nft_is_active_next(net, chain)) + if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); i = 0; list_for_each_entry_continue(rule, &chain->rules, list) { - if (nft_is_active_next(net, rule)) - alloc++; + if (nft_is_active_next(net, rule)) { + data_size += sizeof(*prule) + rule->dlen; + if (data_size > INT_MAX) + return -ENOMEM; + } } + data_size += offsetof(struct nft_rule_dp, data); /* last rule */ - chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc); - if (!chain->rules_next) + chain->blob_next = nf_tables_chain_alloc_rules(data_size); + if (!chain->blob_next) return -ENOMEM; + data = (void *)chain->blob_next->data; + data_boundary = data + data_size; + size = 0; + list_for_each_entry_continue(rule, &chain->rules, list) { - if (nft_is_active_next(net, rule)) - chain->rules_next[i++] = rule; + if (!nft_is_active_next(net, rule)) + continue; + + prule = (struct nft_rule_dp *)data; + data += offsetof(struct nft_rule_dp, data); + if (WARN_ON_ONCE(data > data_boundary)) + return -ENOMEM; + + size = 0; + track.last = last; + nft_rule_for_each_expr(expr, last, rule) { + track.cur = expr; + + if (expr->ops->reduce && + expr->ops->reduce(&track, expr)) { + expr = track.cur; + continue; + } + + if (WARN_ON_ONCE(data + expr->ops->size > data_boundary)) + return -ENOMEM; + + memcpy(data + size, expr, expr->ops->size); + size += expr->ops->size; + } + if (WARN_ON_ONCE(size >= 1 << 12)) + return -ENOMEM; + + prule->handle = rule->handle; + prule->dlen = size; + prule->is_last = 0; + + data += size; + size = 0; + chain->blob_next->size += (unsigned long)(data - (void *)prule); } - chain->rules_next[i] = NULL; + prule = (struct nft_rule_dp *)data; + data += offsetof(struct nft_rule_dp, data); + if (WARN_ON_ONCE(data > data_boundary)) + return -ENOMEM; + + nft_last_rule(chain->blob_next, prule); + return 0; } @@ -8280,8 +8348,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - kvfree(chain->rules_next); - chain->rules_next = NULL; + kvfree(chain->blob_next); + chain->blob_next = NULL; } } } @@ -8290,38 +8358,34 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h) { struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); - kvfree(o->start); + kvfree(o->blob); } -static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) +static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob) { - struct nft_rule **r = rules; struct nft_rules_old *old; - while (*r) - r++; - - r++; /* rcu_head is after end marker */ - old = (void *) r; - old->start = rules; + /* rcu_head is after end marker */ + old = (void *)blob + sizeof(*blob) + blob->size; + old->blob = blob; call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); } static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) { - struct nft_rule **g0, **g1; + struct nft_rule_blob *g0, *g1; bool next_genbit; next_genbit = nft_gencursor_next(net); - g0 = rcu_dereference_protected(chain->rules_gen_0, + g0 = rcu_dereference_protected(chain->blob_gen_0, lockdep_commit_lock_is_held(net)); - g1 = rcu_dereference_protected(chain->rules_gen_1, + g1 = rcu_dereference_protected(chain->blob_gen_1, lockdep_commit_lock_is_held(net)); /* No changes to this chain? */ - if (chain->rules_next == NULL) { + if (chain->blob_next == NULL) { /* chain had no change in last or next generation */ if (g0 == g1) return; @@ -8330,10 +8394,10 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) * one uses same rules as current generation. */ if (next_genbit) { - rcu_assign_pointer(chain->rules_gen_1, g0); + rcu_assign_pointer(chain->blob_gen_1, g0); nf_tables_commit_chain_free_rules_old(g1); } else { - rcu_assign_pointer(chain->rules_gen_0, g1); + rcu_assign_pointer(chain->blob_gen_0, g1); nf_tables_commit_chain_free_rules_old(g0); } @@ -8341,11 +8405,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) } if (next_genbit) - rcu_assign_pointer(chain->rules_gen_1, chain->rules_next); + rcu_assign_pointer(chain->blob_gen_1, chain->blob_next); else - rcu_assign_pointer(chain->rules_gen_0, chain->rules_next); + rcu_assign_pointer(chain->blob_gen_0, chain->blob_next); - chain->rules_next = NULL; + chain->blob_next = NULL; if (g0 == g1) return; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index adc348056076..36e73f9828c5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -38,7 +38,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - const struct nft_rule *rule, + const struct nft_rule_dp *rule, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { @@ -67,6 +67,36 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static noinline void __nft_trace_verdict(struct nft_traceinfo *info, + const struct nft_chain *chain, + const struct nft_regs *regs) +{ + enum nft_trace_types type; + + switch (regs->verdict.code) { + case NFT_CONTINUE: + case NFT_RETURN: + type = NFT_TRACETYPE_RETURN; + break; + default: + type = NFT_TRACETYPE_RULE; + break; + } + + __nft_trace_packet(info, chain, type); +} + +static inline void nft_trace_verdict(struct nft_traceinfo *info, + const struct nft_chain *chain, + const struct nft_rule_dp *rule, + const struct nft_regs *regs) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + info->rule = rule; + __nft_trace_verdict(info, chain, regs); + } +} + static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -110,7 +140,6 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, base_chain = nft_base_chain(chain); - rcu_read_lock(); pstats = READ_ONCE(base_chain->stats); if (pstats) { local_bh_disable(); @@ -121,12 +150,12 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, u64_stats_update_end(&stats->syncp); local_bh_enable(); } - rcu_read_unlock(); } struct nft_jumpstack { - const struct nft_chain *chain; - struct nft_rule *const *rules; + const struct nft_chain *chain; + const struct nft_rule_dp *rule; + const struct nft_rule_dp *last_rule; }; static void expr_call_ops_eval(const struct nft_expr *expr, @@ -141,6 +170,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr, X(e, nft_payload_eval); X(e, nft_cmp_eval); + X(e, nft_counter_eval); X(e, nft_meta_get_eval); X(e, nft_lookup_eval); X(e, nft_range_eval); @@ -154,18 +184,28 @@ static void expr_call_ops_eval(const struct nft_expr *expr, expr->ops->eval(expr, regs, pkt); } +#define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0] +#define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size +#define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen] +#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen + +#define nft_rule_dp_for_each_expr(expr, last, rule) \ + for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_rule_expr_next(expr)) + unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; + const struct nft_rule_dp *rule, *last_rule; const struct net *net = nft_net(pkt); - struct nft_rule *const *rules; - const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); + struct nft_rule_blob *blob; struct nft_traceinfo info; info.trace = false; @@ -173,16 +213,16 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: if (genbit) - rules = rcu_dereference(chain->rules_gen_1); + blob = rcu_dereference(chain->blob_gen_1); else - rules = rcu_dereference(chain->rules_gen_0); + blob = rcu_dereference(chain->blob_gen_0); + rule = (struct nft_rule_dp *)blob->data; + last_rule = (void *)blob->data + blob->size; next_rule: - rule = *rules; regs.verdict.code = NFT_CONTINUE; - for (; *rules ; rules++) { - rule = *rules; - nft_rule_for_each_expr(expr, last, rule) { + for (; rule < last_rule; rule = nft_rule_next(rule)) { + nft_rule_dp_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); else if (expr->ops == &nft_bitwise_fast_ops) @@ -207,13 +247,13 @@ next_rule: break; } + nft_trace_verdict(&info, chain, rule, ®s); + switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: case NF_STOLEN: - nft_trace_packet(&info, chain, rule, - NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -222,28 +262,25 @@ next_rule: if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) return NF_DROP; jumpstack[stackptr].chain = chain; - jumpstack[stackptr].rules = rules + 1; + jumpstack[stackptr].rule = nft_rule_next(rule); + jumpstack[stackptr].last_rule = last_rule; stackptr++; fallthrough; case NFT_GOTO: - nft_trace_packet(&info, chain, rule, - NFT_TRACETYPE_RULE); - chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: case NFT_RETURN: - nft_trace_packet(&info, chain, rule, - NFT_TRACETYPE_RETURN); break; default: - WARN_ON(1); + WARN_ON_ONCE(1); } if (stackptr > 0) { stackptr--; chain = jumpstack[stackptr].chain; - rules = jumpstack[stackptr].rules; + rule = jumpstack[stackptr].rule; + last_rule = jumpstack[stackptr].last_rule; goto next_rule; } @@ -269,18 +306,22 @@ static struct nft_expr_type *nft_basic_types[] = { &nft_rt_type, &nft_exthdr_type, &nft_last_type, + &nft_counter_type, }; static struct nft_object_type *nft_basic_objects[] = { #ifdef CONFIG_NETWORK_SECMARK &nft_secmark_obj_type, #endif + &nft_counter_obj_type, }; int __init nf_tables_core_module_init(void) { int err, i, j = 0; + nft_counter_init_seqcount(); + for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) { err = nft_register_obj(nft_basic_objects[i]); if (err) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 84a7dea46efa..5041725423c2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -142,7 +142,7 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, static int nf_trace_fill_rule_info(struct sk_buff *nlskb, const struct nft_traceinfo *info) { - if (!info->rule) + if (!info->rule || info->rule->is_last) return 0; /* a continue verdict with ->type == RETURN means that this is diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7f83f9697fc1..ae9c0756bba5 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -66,6 +66,7 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct net *net; + netns_tracker ns_tracker; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ u32 peer_portid; /* PORTID of the peer process */ @@ -140,7 +141,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head) struct nfulnl_instance *inst = container_of(head, struct nfulnl_instance, rcu); - put_net(inst->net); + put_net_track(inst->net, &inst->ns_tracker); kfree(inst); module_put(THIS_MODULE); } @@ -187,7 +188,7 @@ instance_create(struct net *net, u_int16_t group_num, timer_setup(&inst->timer, nfulnl_timer, 0); - inst->net = get_net(net); + inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC); inst->peer_user_ns = user_ns; inst->peer_portid = portid; inst->group_num = group_num; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 44c3de176d18..ea2d9c2a44cf 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_ct_hook *ct_hook; + const struct nf_ct_hook *ct_hook; int err; if (verdict == NF_ACCEPT || @@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info ctinfo = 0; - struct nfnl_ct_hook *nfnl_ct; + const struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; u32 seclen = 0; @@ -1104,7 +1104,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb, return 0; } -static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, +static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct, const struct nlmsghdr *nlh, const struct nlattr * const nfqa[], struct nf_queue_entry *entry, @@ -1171,11 +1171,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, { struct nfnl_queue_net *q = nfnl_queue_pernet(info->net); u_int16_t queue_num = ntohs(info->nfmsg->res_id); + const struct nfnl_ct_hook *nfnl_ct; struct nfqnl_msg_verdict_hdr *vhdr; enum ip_conntrack_info ctinfo; struct nfqnl_instance *queue; struct nf_queue_entry *entry; - struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; unsigned int verdict; int err; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 47b0dba95054..7b727d3ebf9d 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -278,12 +278,52 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_bitwise_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_bitwise *bitwise; + + if (!track->regs[priv->sreg].selector) + return false; + + bitwise = nft_expr_priv(expr); + if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && + track->regs[priv->dreg].bitwise && + track->regs[priv->dreg].bitwise->ops == expr->ops && + priv->sreg == bitwise->sreg && + priv->dreg == bitwise->dreg && + priv->op == bitwise->op && + priv->len == bitwise->len && + !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) && + !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) && + !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) { + track->cur = expr; + return true; + } + + if (track->regs[priv->sreg].bitwise) { + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + if (priv->sreg != priv->dreg) { + track->regs[priv->dreg].selector = + track->regs[priv->sreg].selector; + } + track->regs[priv->dreg].bitwise = expr; + + return false; +} + static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, + .reduce = nft_bitwise_reduce, .offload = nft_bitwise_offload, }; @@ -385,12 +425,49 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + const struct nft_bitwise_fast_expr *bitwise; + + if (!track->regs[priv->sreg].selector) + return false; + + bitwise = nft_expr_priv(expr); + if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && + track->regs[priv->dreg].bitwise && + track->regs[priv->dreg].bitwise->ops == expr->ops && + priv->sreg == bitwise->sreg && + priv->dreg == bitwise->dreg && + priv->mask == bitwise->mask && + priv->xor == bitwise->xor) { + track->cur = expr; + return true; + } + + if (track->regs[priv->sreg].bitwise) { + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + if (priv->sreg != priv->dreg) { + track->regs[priv->dreg].selector = + track->regs[priv->sreg].selector; + } + track->regs[priv->dreg].bitwise = expr; + + return false; +} + const struct nft_expr_ops nft_bitwise_fast_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), .eval = NULL, /* inlined */ .init = nft_bitwise_fast_init, .dump = nft_bitwise_fast_dump, + .reduce = nft_bitwise_fast_reduce, .offload = nft_bitwise_fast_offload, }; @@ -427,3 +504,21 @@ struct nft_expr_type nft_bitwise_type __read_mostly = { .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; + +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_expr *last = track->last; + const struct nft_expr *next; + + if (expr == last) + return false; + + next = nft_expr_next(expr); + if (next->ops == &nft_bitwise_ops) + return nft_bitwise_reduce(track, next); + else if (next->ops == &nft_bitwise_fast_ops) + return nft_bitwise_fast_reduce(track, next); + + return false; +} diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d0761fad37e..58dcafe8bf79 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -14,7 +14,7 @@ #include <net/netfilter/nf_conntrack_zones.h> struct nft_connlimit { - struct nf_conncount_list list; + struct nf_conncount_list *list; u32 limit; bool invert; }; @@ -43,12 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { + if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; return; } - count = priv->list.count; + count = priv->list->count; if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; @@ -76,7 +76,11 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, invert = true; } - nf_conncount_list_init(&priv->list); + priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL); + if (!priv->list) + return -ENOMEM; + + nf_conncount_list_init(priv->list); priv->limit = limit; priv->invert = invert; @@ -87,7 +91,8 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, struct nft_connlimit *priv) { nf_ct_netns_put(ctx->net, ctx->family); - nf_conncount_cache_free(&priv->list); + nf_conncount_cache_free(priv->list); + kfree(priv->list); } static int nft_connlimit_do_dump(struct sk_buff *skb, @@ -200,7 +205,11 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) struct nft_connlimit *priv_dst = nft_expr_priv(dst); struct nft_connlimit *priv_src = nft_expr_priv(src); - nf_conncount_list_init(&priv_dst->list); + priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); + if (priv_dst->list) + return -ENOMEM; + + nf_conncount_list_init(priv_dst->list); priv_dst->limit = priv_src->limit; priv_dst->invert = priv_src->invert; @@ -212,7 +221,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx, { struct nft_connlimit *priv = nft_expr_priv(expr); - nf_conncount_cache_free(&priv->list); + nf_conncount_cache_free(priv->list); + kfree(priv->list); } static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) @@ -221,7 +231,7 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) bool ret; local_bh_disable(); - ret = nf_conncount_gc_list(net, &priv->list); + ret = nf_conncount_gc_list(net, priv->list); local_bh_enable(); return ret; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 8edd3b3c173d..f179e8c3b0ca 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -13,6 +13,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_offload.h> struct nft_counter { @@ -174,7 +175,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; -static struct nft_object_type nft_counter_obj_type; +struct nft_object_type nft_counter_obj_type; static const struct nft_object_ops nft_counter_obj_ops = { .type = &nft_counter_obj_type, .size = sizeof(struct nft_counter_percpu_priv), @@ -184,7 +185,7 @@ static const struct nft_object_ops nft_counter_obj_ops = { .dump = nft_counter_obj_dump, }; -static struct nft_object_type nft_counter_obj_type __read_mostly = { +struct nft_object_type nft_counter_obj_type __read_mostly = { .type = NFT_OBJECT_COUNTER, .ops = &nft_counter_obj_ops, .maxattr = NFTA_COUNTER_MAX, @@ -192,9 +193,8 @@ static struct nft_object_type nft_counter_obj_type __read_mostly = { .owner = THIS_MODULE, }; -static void nft_counter_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); @@ -275,7 +275,15 @@ static void nft_counter_offload_stats(struct nft_expr *expr, preempt_enable(); } -static struct nft_expr_type nft_counter_type; +void nft_counter_init_seqcount(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); +} + +struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), @@ -289,7 +297,7 @@ static const struct nft_expr_ops nft_counter_ops = { .offload_stats = nft_counter_offload_stats, }; -static struct nft_expr_type nft_counter_type __read_mostly = { +struct nft_expr_type nft_counter_type __read_mostly = { .name = "counter", .ops = &nft_counter_ops, .policy = nft_counter_policy, @@ -297,39 +305,3 @@ static struct nft_expr_type nft_counter_type __read_mostly = { .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; - -static int __init nft_counter_module_init(void) -{ - int cpu, err; - - for_each_possible_cpu(cpu) - seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); - - err = nft_register_obj(&nft_counter_obj_type); - if (err < 0) - return err; - - err = nft_register_expr(&nft_counter_type); - if (err < 0) - goto err1; - - return 0; -err1: - nft_unregister_obj(&nft_counter_obj_type); - return err; -} - -static void __exit nft_counter_module_exit(void) -{ - nft_unregister_expr(&nft_counter_type); - nft_unregister_obj(&nft_counter_obj_type); -} - -module_init(nft_counter_module_init); -module_exit(nft_counter_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("counter"); -MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); -MODULE_DESCRIPTION("nftables counter rule support"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 99b1de14ff7e..518d96c8c247 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -259,7 +259,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(atomic_read(&ct->ct_general.use) == 1)) { + if (likely(refcount_read(&ct->ct_general.use) == 1)) { nf_ct_zone_add(ct, &zone); } else { /* previous skb got queued to userspace */ @@ -270,7 +270,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, } } - atomic_inc(&ct->ct_general.use); nf_ct_set(skb, ct, IP_CT_NEW); } #endif @@ -375,7 +374,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void) return false; } - atomic_set(&tmp->ct_general.use, 1); per_cpu(nft_ct_pcpu_template, cpu) = tmp; } diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 304e33cbed9b..5ee33d0ccd4e 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -8,9 +8,13 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +struct nft_last { + unsigned long jiffies; + unsigned int set; +}; + struct nft_last_priv { - unsigned long last_jiffies; - unsigned int last_set; + struct nft_last *last; }; static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = { @@ -22,47 +26,55 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_last_priv *priv = nft_expr_priv(expr); + struct nft_last *last; u64 last_jiffies; - u32 last_set = 0; int err; - if (tb[NFTA_LAST_SET]) { - last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); - if (last_set == 1) - priv->last_set = 1; - } + last = kzalloc(sizeof(*last), GFP_KERNEL); + if (!last) + return -ENOMEM; + + if (tb[NFTA_LAST_SET]) + last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); - if (last_set && tb[NFTA_LAST_MSECS]) { + if (last->set && tb[NFTA_LAST_MSECS]) { err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); if (err < 0) - return err; + goto err; - priv->last_jiffies = jiffies - (unsigned long)last_jiffies; + last->jiffies = jiffies - (unsigned long)last_jiffies; } + priv->last = last; return 0; +err: + kfree(last); + + return err; } static void nft_last_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_last_priv *priv = nft_expr_priv(expr); + struct nft_last *last = priv->last; - if (READ_ONCE(priv->last_jiffies) != jiffies) - WRITE_ONCE(priv->last_jiffies, jiffies); - if (READ_ONCE(priv->last_set) == 0) - WRITE_ONCE(priv->last_set, 1); + if (READ_ONCE(last->jiffies) != jiffies) + WRITE_ONCE(last->jiffies, jiffies); + if (READ_ONCE(last->set) == 0) + WRITE_ONCE(last->set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); - unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); - u32 last_set = READ_ONCE(priv->last_set); + struct nft_last *last = priv->last; + unsigned long last_jiffies = READ_ONCE(last->jiffies); + u32 last_set = READ_ONCE(last->set); __be64 msecs; if (time_before(jiffies, last_jiffies)) { - WRITE_ONCE(priv->last_set, 0); + WRITE_ONCE(last->set, 0); last_set = 0; } @@ -81,11 +93,32 @@ nla_put_failure: return -1; } +static void nft_last_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_last_priv *priv = nft_expr_priv(expr); + + kfree(priv->last); +} + +static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) +{ + struct nft_last_priv *priv_dst = nft_expr_priv(dst); + + priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); + if (priv_dst->last) + return -ENOMEM; + + return 0; +} + static const struct nft_expr_ops nft_last_ops = { .type = &nft_last_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)), .eval = nft_last_eval, .init = nft_last_init, + .destroy = nft_last_destroy, + .clone = nft_last_clone, .dump = nft_last_dump, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 82ec27bdf941..f04be5be73a0 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -18,6 +18,10 @@ struct nft_limit { spinlock_t lock; u64 last; u64 tokens; +}; + +struct nft_limit_priv { + struct nft_limit *limit; u64 tokens_max; u64 rate; u64 nsecs; @@ -25,33 +29,33 @@ struct nft_limit { bool invert; }; -static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) +static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) { u64 now, tokens; s64 delta; - spin_lock_bh(&limit->lock); + spin_lock_bh(&priv->limit->lock); now = ktime_get_ns(); - tokens = limit->tokens + now - limit->last; - if (tokens > limit->tokens_max) - tokens = limit->tokens_max; + tokens = priv->limit->tokens + now - priv->limit->last; + if (tokens > priv->tokens_max) + tokens = priv->tokens_max; - limit->last = now; + priv->limit->last = now; delta = tokens - cost; if (delta >= 0) { - limit->tokens = delta; - spin_unlock_bh(&limit->lock); - return limit->invert; + priv->limit->tokens = delta; + spin_unlock_bh(&priv->limit->lock); + return priv->invert; } - limit->tokens = tokens; - spin_unlock_bh(&limit->lock); - return !limit->invert; + priv->limit->tokens = tokens; + spin_unlock_bh(&priv->limit->lock); + return !priv->invert; } /* Use same default as in iptables. */ #define NFT_LIMIT_PKT_BURST_DEFAULT 5 -static int nft_limit_init(struct nft_limit *limit, +static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { u64 unit, tokens; @@ -60,58 +64,62 @@ static int nft_limit_init(struct nft_limit *limit, tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; - limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - limit->nsecs = unit * NSEC_PER_SEC; - if (limit->rate == 0 || limit->nsecs < unit) + priv->nsecs = unit * NSEC_PER_SEC; + if (priv->rate == 0 || priv->nsecs < unit) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) - limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); - if (pkts && limit->burst == 0) - limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT; + if (pkts && priv->burst == 0) + priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (limit->rate + limit->burst < limit->rate) + if (priv->rate + priv->burst < priv->rate) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst; + tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; } else { /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst), - limit->rate); + tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), + priv->rate); } - limit->tokens = tokens; - limit->tokens_max = limit->tokens; + priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL); + if (!priv->limit) + return -ENOMEM; + + priv->limit->tokens = tokens; + priv->tokens_max = priv->limit->tokens; if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); if (flags & NFT_LIMIT_F_INV) - limit->invert = true; + priv->invert = true; } - limit->last = ktime_get_ns(); - spin_lock_init(&limit->lock); + priv->limit->last = ktime_get_ns(); + spin_lock_init(&priv->limit->lock); return 0; } -static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, +static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv, enum nft_limit_type type) { - u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; - u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); + u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0; + u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC); - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate), + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) || - nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || + nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) || nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) goto nla_put_failure; @@ -121,8 +129,34 @@ nla_put_failure: return -1; } -struct nft_limit_pkts { - struct nft_limit limit; +static void nft_limit_destroy(const struct nft_ctx *ctx, + const struct nft_limit_priv *priv) +{ + kfree(priv->limit); +} + +static int nft_limit_clone(struct nft_limit_priv *priv_dst, + const struct nft_limit_priv *priv_src) +{ + priv_dst->tokens_max = priv_src->tokens_max; + priv_dst->rate = priv_src->rate; + priv_dst->nsecs = priv_src->nsecs; + priv_dst->burst = priv_src->burst; + priv_dst->invert = priv_src->invert; + + priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC); + if (priv_dst->limit) + return -ENOMEM; + + spin_lock_init(&priv_dst->limit->lock); + priv_dst->limit->tokens = priv_src->tokens_max; + priv_dst->limit->last = ktime_get_ns(); + + return 0; +} + +struct nft_limit_priv_pkts { + struct nft_limit_priv limit; u64 cost; }; @@ -130,7 +164,7 @@ static void nft_limit_pkts_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_pkts *priv = nft_expr_priv(expr); + struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; @@ -148,7 +182,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_limit_pkts *priv = nft_expr_priv(expr); + struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); int err; err = nft_limit_init(&priv->limit, tb, true); @@ -161,17 +195,35 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_limit_pkts *priv = nft_expr_priv(expr); + const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } +static void nft_limit_pkts_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr); + + nft_limit_destroy(ctx, &priv->limit); +} + +static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src) +{ + struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); + struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); + + return nft_limit_clone(&priv_dst->limit, &priv_src->limit); +} + static struct nft_expr_type nft_limit_type; static const struct nft_expr_ops nft_limit_pkts_ops = { .type = &nft_limit_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .eval = nft_limit_pkts_eval, .init = nft_limit_pkts_init, + .destroy = nft_limit_pkts_destroy, + .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, }; @@ -179,7 +231,7 @@ static void nft_limit_bytes_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit *priv = nft_expr_priv(expr); + struct nft_limit_priv *priv = nft_expr_priv(expr); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) @@ -190,7 +242,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_limit *priv = nft_expr_priv(expr); + struct nft_limit_priv *priv = nft_expr_priv(expr); return nft_limit_init(priv, tb, false); } @@ -198,17 +250,35 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx, static int nft_limit_bytes_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_limit *priv = nft_expr_priv(expr); + const struct nft_limit_priv *priv = nft_expr_priv(expr); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } +static void nft_limit_bytes_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_limit_priv *priv = nft_expr_priv(expr); + + nft_limit_destroy(ctx, priv); +} + +static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src) +{ + struct nft_limit_priv *priv_dst = nft_expr_priv(dst); + struct nft_limit_priv *priv_src = nft_expr_priv(src); + + return nft_limit_clone(priv_dst, priv_src); +} + static const struct nft_expr_ops nft_limit_bytes_ops = { .type = &nft_limit_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)), .eval = nft_limit_bytes_eval, .init = nft_limit_bytes_init, .dump = nft_limit_bytes_dump, + .clone = nft_limit_bytes_clone, + .destroy = nft_limit_bytes_destroy, }; static const struct nft_expr_ops * @@ -240,7 +310,7 @@ static void nft_limit_obj_pkts_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit_pkts *priv = nft_obj_data(obj); + struct nft_limit_priv_pkts *priv = nft_obj_data(obj); if (nft_limit_eval(&priv->limit, priv->cost)) regs->verdict.code = NFT_BREAK; @@ -250,7 +320,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { - struct nft_limit_pkts *priv = nft_obj_data(obj); + struct nft_limit_priv_pkts *priv = nft_obj_data(obj); int err; err = nft_limit_init(&priv->limit, tb, true); @@ -265,7 +335,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { - const struct nft_limit_pkts *priv = nft_obj_data(obj); + const struct nft_limit_priv_pkts *priv = nft_obj_data(obj); return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); } @@ -273,7 +343,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb, static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_pkts_ops = { .type = &nft_limit_obj_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)), .init = nft_limit_obj_pkts_init, .eval = nft_limit_obj_pkts_eval, .dump = nft_limit_obj_pkts_dump, @@ -283,7 +353,7 @@ static void nft_limit_obj_bytes_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_limit *priv = nft_obj_data(obj); + struct nft_limit_priv *priv = nft_obj_data(obj); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) @@ -294,7 +364,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) { - struct nft_limit *priv = nft_obj_data(obj); + struct nft_limit_priv *priv = nft_obj_data(obj); return nft_limit_init(priv, tb, false); } @@ -303,7 +373,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb, struct nft_object *obj, bool reset) { - const struct nft_limit *priv = nft_obj_data(obj); + const struct nft_limit_priv *priv = nft_obj_data(obj); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } @@ -311,7 +381,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb, static struct nft_object_type nft_limit_obj_type; static const struct nft_object_ops nft_limit_obj_bytes_ops = { .type = &nft_limit_obj_type, - .size = sizeof(struct nft_limit), + .size = sizeof(struct nft_limit_priv), .init = nft_limit_obj_bytes_init, .eval = nft_limit_obj_bytes_eval, .dump = nft_limit_obj_bytes_dump, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index fe91ff5f8fbe..5ab4df56c945 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -750,16 +750,63 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_meta_get_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct nft_meta *meta; + + if (!track->regs[priv->dreg].selector || + track->regs[priv->dreg].selector->ops != expr->ops) { + track->regs[priv->dreg].selector = expr; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + meta = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->key != meta->key || + priv->dreg != meta->dreg) { + track->regs[priv->dreg].selector = expr; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} + static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, + .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; +static bool nft_meta_set_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + int i; + + for (i = 0; i < NFT_REG32_NUM; i++) { + if (!track->regs[i].selector) + continue; + + if (track->regs[i].selector->ops != &nft_meta_get_ops) + continue; + + track->regs[i].selector = NULL; + track->regs[i].bitwise = NULL; + } + + return false; +} + static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), @@ -767,6 +814,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, + .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 722cac1e90e0..1d378efd8823 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { u8 dreg; u32 modulus; - atomic_t counter; + atomic_t *counter; u32 offset; }; @@ -27,9 +27,9 @@ static u32 nft_ng_inc_gen(struct nft_ng_inc *priv) u32 nval, oval; do { - oval = atomic_read(&priv->counter); + oval = atomic_read(priv->counter); nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; - } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); + } while (atomic_cmpxchg(priv->counter, oval, nval) != oval); return nval + priv->offset; } @@ -55,6 +55,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ng_inc *priv = nft_expr_priv(expr); + int err; if (tb[NFTA_NG_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); @@ -66,10 +67,22 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - atomic_set(&priv->counter, priv->modulus - 1); + priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL); + if (!priv->counter) + return -ENOMEM; - return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, - NULL, NFT_DATA_VALUE, sizeof(u32)); + atomic_set(priv->counter, priv->modulus - 1); + + err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); + if (err < 0) + goto err; + + return 0; +err: + kfree(priv->counter); + + return err; } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -98,6 +111,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } +static void nft_ng_inc_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_ng_inc *priv = nft_expr_priv(expr); + + kfree(priv->counter); +} + struct nft_ng_random { u8 dreg; u32 modulus; @@ -157,6 +178,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), .eval = nft_ng_inc_eval, .init = nft_ng_inc_init, + .destroy = nft_ng_inc_destroy, .dump = nft_ng_inc_dump, }; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bd689938a2e0..10f422caa76a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -157,7 +157,8 @@ void nft_payload_eval(const struct nft_expr *expr, goto err; break; default: - BUG(); + WARN_ON_ONCE(1); + goto err; } offset += priv->offset; @@ -209,6 +210,34 @@ nla_put_failure: return -1; } +static bool nft_payload_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct nft_payload *payload; + + if (!track->regs[priv->dreg].selector || + track->regs[priv->dreg].selector->ops != expr->ops) { + track->regs[priv->dreg].selector = expr; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + payload = nft_expr_priv(track->regs[priv->dreg].selector); + if (priv->base != payload->base || + priv->offset != payload->offset || + priv->len != payload->len) { + track->regs[priv->dreg].selector = expr; + track->regs[priv->dreg].bitwise = NULL; + return false; + } + + if (!track->regs[priv->dreg].bitwise) + return true; + + return nft_expr_reduce_bitwise(track, expr); +} + static bool nft_payload_offload_mask(struct nft_offload_reg *reg, u32 priv_len, u32 field_len) { @@ -512,6 +541,7 @@ static const struct nft_expr_ops nft_payload_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, + .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -521,6 +551,7 @@ const struct nft_expr_ops nft_payload_fast_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, + .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -664,7 +695,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, goto err; break; default: - BUG(); + WARN_ON_ONCE(1); + goto err; } csum_offset = offset + priv->csum_offset; @@ -766,12 +798,33 @@ nla_put_failure: return -1; } +static bool nft_payload_set_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + int i; + + for (i = 0; i < NFT_REG32_NUM; i++) { + if (!track->regs[i].selector) + continue; + + if (track->regs[i].selector->ops != &nft_payload_ops && + track->regs[i].selector->ops != &nft_payload_fast_ops) + continue; + + track->regs[i].selector = NULL; + track->regs[i].bitwise = NULL; + } + + return false; +} + static const struct nft_expr_ops nft_payload_set_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), .eval = nft_payload_set_eval, .init = nft_payload_set_init, .dump = nft_payload_set_dump, + .reduce = nft_payload_set_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c4d1389f7185..0484aef74273 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -15,13 +15,13 @@ struct nft_quota { atomic64_t quota; unsigned long flags; - atomic64_t consumed; + atomic64_t *consumed; }; static inline bool nft_overquota(struct nft_quota *priv, const struct sk_buff *skb) { - return atomic64_add_return(skb->len, &priv->consumed) >= + return atomic64_add_return(skb->len, priv->consumed) >= atomic64_read(&priv->quota); } @@ -90,13 +90,23 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return -EOPNOTSUPP; } + priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL); + if (!priv->consumed) + return -ENOMEM; + atomic64_set(&priv->quota, quota); priv->flags = flags; - atomic64_set(&priv->consumed, consumed); + atomic64_set(priv->consumed, consumed); return 0; } +static void nft_quota_do_destroy(const struct nft_ctx *ctx, + struct nft_quota *priv) +{ + kfree(priv->consumed); +} + static int nft_quota_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) @@ -128,7 +138,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, * that we see, don't go over the quota boundary in what we send to * userspace. */ - consumed = atomic64_read(&priv->consumed); + consumed = atomic64_read(priv->consumed); quota = atomic64_read(&priv->quota); if (consumed >= quota) { consumed_cap = quota; @@ -145,7 +155,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, goto nla_put_failure; if (reset) { - atomic64_sub(consumed, &priv->consumed); + atomic64_sub(consumed, priv->consumed); clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); } return 0; @@ -162,11 +172,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, return nft_quota_do_dump(skb, priv, reset); } +static void nft_quota_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_quota *priv = nft_obj_data(obj); + + return nft_quota_do_destroy(ctx, priv); +} + static struct nft_object_type nft_quota_obj_type; static const struct nft_object_ops nft_quota_obj_ops = { .type = &nft_quota_obj_type, .size = sizeof(struct nft_quota), .init = nft_quota_obj_init, + .destroy = nft_quota_obj_destroy, .eval = nft_quota_obj_eval, .dump = nft_quota_obj_dump, .update = nft_quota_obj_update, @@ -205,12 +224,35 @@ static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) return nft_quota_do_dump(skb, priv, false); } +static void nft_quota_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + return nft_quota_do_destroy(ctx, priv); +} + +static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) +{ + struct nft_quota *priv_dst = nft_expr_priv(dst); + + priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); + if (priv_dst->consumed) + return -ENOMEM; + + atomic64_set(priv_dst->consumed, 0); + + return 0; +} + static struct nft_expr_type nft_quota_type; static const struct nft_expr_ops nft_quota_ops = { .type = &nft_quota_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)), .eval = nft_quota_eval, .init = nft_quota_init, + .destroy = nft_quota_destroy, + .clone = nft_quota_clone, .dump = nft_quota_dump, }; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 6f4116e72958..52e0d026d30a 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1048,11 +1048,9 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) { - unsigned long *lt = f->lt, bsize = f->bsize; + unsigned long bsize = f->bsize; int i, ret = -1, b; - lt += offset * NFT_PIPAPO_LONGS_PER_M256; - if (first) memset(map, 0xff, bsize * sizeof(*map)); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0a913ce07425..267757b0392a 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) return XT_CONTINUE; if (ct) { - atomic_inc(&ct->ct_general.use); + refcount_inc(&ct->ct_general.use); nf_ct_set(skb, ct, IP_CT_NEW); } else { nf_ct_set(skb, ct, IP_CT_UNTRACKED); @@ -201,7 +201,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err4; } __set_bit(IPS_CONFIRMED_BIT, &ct->status); - nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; |