summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c22
-rw-r--r--net/netfilter/nf_nat_core.c24
-rw-r--r--net/netfilter/nf_tables_api.c1269
-rw-r--r--net/netfilter/nfnetlink.c10
-rw-r--r--net/netfilter/nfnetlink_acct.c86
-rw-r--r--net/netfilter/nft_ct.c96
-rw-r--r--net/netfilter/nft_hash.c59
-rw-r--r--net/netfilter/nft_lookup.c10
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_rbtree.c43
-rw-r--r--net/netfilter/xt_bpf.c5
-rw-r--r--net/netfilter/xt_nfacct.c5
-rw-r--r--net/netfilter/xt_recent.c5
15 files changed, 1237 insertions, 507 deletions
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 117208321f16..ec8114fae50b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -271,10 +271,7 @@ ip_set_free(void *members)
{
pr_debug("%p: free with %s\n", members,
is_vmalloc_addr(members) ? "vfree" : "kfree");
- if (is_vmalloc_addr(members))
- vfree(members);
- else
- kfree(members);
+ kvfree(members);
}
EXPORT_SYMBOL_GPL(ip_set_free);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3d2d2c8108ca..e6836755c45d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -97,7 +97,7 @@ const char *ip_vs_proto_name(unsigned int proto)
return "ICMPv6";
#endif
default:
- sprintf(buf, "IP_%d", proto);
+ sprintf(buf, "IP_%u", proto);
return buf;
}
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e4cf8c..73ba1cc7a88d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(iph);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
rcu_read_unlock();
@@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
rcu_read_unlock();
@@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
MTU problem. */
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
rcu_read_unlock();
@@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
MTU problem. */
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
rcu_read_unlock();
@@ -883,10 +883,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
@@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->hop_limit = old_iph->hop_limit;
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
@@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(ip_hdr(skb));
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
rcu_read_unlock();
@@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
rcu_read_unlock();
@@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_nat_icmp(skb, pp, cp, 0);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
rcu_read_unlock();
@@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
/* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
+ skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
rcu_read_unlock();
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 52ca952b802c..09096a670c45 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -358,6 +358,19 @@ out:
rcu_read_unlock();
}
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+ if (nat)
+ return nat;
+
+ if (!nf_ct_is_confirmed(ct))
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+ return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
@@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct,
struct nf_conn_nat *nat;
/* nat helper or nfctnetlink also setup binding */
- nat = nfct_nat(ct);
- if (!nat) {
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
maniptype == NF_NAT_MANIP_DST);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159db9f06..624e083125b9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
return ERR_PTR(-EAFNOSUPPORT);
}
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nft_af_info *afi,
+ struct nft_table *table,
+ struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+ ctx->portid = NETLINK_CB(skb).portid;
+ ctx->report = nlmsg_report(nlh);
+ ctx->seq = nlh->nlmsg_seq;
+}
+
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+ u32 size)
+{
+ struct nft_trans *trans;
+
+ trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ if (trans == NULL)
+ return NULL;
+
+ trans->msg_type = msg_type;
+ trans->ctx = *ctx;
+
+ return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ list_del(&trans->list);
+ kfree(trans);
+}
+
/*
* Tables
*/
@@ -197,20 +236,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- int event, int family)
+static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
- family, table);
+ err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -269,6 +303,9 @@ done:
return skb->len;
}
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -295,6 +332,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -343,7 +382,7 @@ err:
return err;
}
-static int nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
@@ -353,45 +392,63 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
nf_unregister_hooks(nft_base_chain(chain)->ops,
afi->nops);
}
-
- return 0;
}
-static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct nft_af_info *afi, struct nft_table *table)
+static int nf_tables_updtable(struct nft_ctx *ctx)
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family, ret = 0;
+ struct nft_trans *trans;
+ u32 flags;
+ int ret = 0;
- if (nla[NFTA_TABLE_FLAGS]) {
- u32 flags;
+ if (!ctx->nla[NFTA_TABLE_FLAGS])
+ return 0;
- flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+ sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
- if ((flags & NFT_TABLE_F_DORMANT) &&
- !(table->flags & NFT_TABLE_F_DORMANT)) {
- ret = nf_tables_table_disable(afi, table);
- if (ret >= 0)
- table->flags |= NFT_TABLE_F_DORMANT;
- } else if (!(flags & NFT_TABLE_F_DORMANT) &&
- table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(afi, table);
- if (ret >= 0)
- table->flags &= ~NFT_TABLE_F_DORMANT;
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_trans_table_enable(trans) = false;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ if (ret >= 0) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ nft_trans_table_enable(trans) = true;
}
- if (ret < 0)
- goto err;
}
+ if (ret < 0)
+ goto err;
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ nft_trans_table_update(trans) = true;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
err:
+ nft_trans_destroy(trans);
return ret;
}
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -403,6 +460,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, true);
if (IS_ERR(afi))
@@ -417,11 +476,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
}
if (table != NULL) {
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +507,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+ if (err < 0) {
+ kfree(table);
+ module_put(afi->owner);
+ return err;
+ }
list_add_tail(&table->list, &afi->tables);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
return 0;
}
@@ -457,7 +526,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family;
+ int family = nfmsg->nfgen_family, err;
+ struct nft_ctx ctx;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -466,17 +536,28 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
-
- if (!list_empty(&table->chains) || !list_empty(&table->sets))
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
+ if (table->use > 0)
return -EBUSY;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
list_del(&table->list);
- nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
- kfree(table);
- module_put(afi->owner);
return 0;
}
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+ BUG_ON(ctx->table->use > 0);
+
+ kfree(ctx->table);
+ module_put(ctx->afi->owner);
+}
+
int nft_register_chain_type(const struct nf_chain_type *ctype)
{
int err = 0;
@@ -541,7 +622,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
- [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
};
@@ -637,21 +718,13 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
- int event, int family)
+static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
- u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
- bool report;
int err;
- report = nlh ? nlmsg_report(nlh) : false;
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -659,18 +732,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
- table, chain);
+ err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -740,10 +816,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -767,8 +847,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static int
-nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
@@ -777,14 +856,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
if (err < 0)
- return err;
+ return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
newstats = alloc_percpu(struct nft_stats);
if (newstats == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -793,6 +872,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ return newstats;
+}
+
+static void nft_chain_stats_replace(struct nft_base_chain *chain,
+ struct nft_stats __percpu *newstats)
+{
if (chain->stats) {
struct nft_stats __percpu *oldstats =
nft_dereference(chain->stats);
@@ -802,17 +887,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
free_percpu(oldstats);
} else
rcu_assign_pointer(chain->stats, newstats);
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
}
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ module_put(nft_base_chain(chain)->type->owner);
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else {
+ kfree(chain);
+ }
+}
+
static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr * uninitialized_var(name);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
@@ -822,8 +933,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
+ struct nft_stats __percpu *stats;
int err;
bool create;
+ struct nft_ctx ctx;
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
@@ -869,6 +982,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
}
if (chain != NULL) {
+ struct nft_stats *stats = NULL;
+ struct nft_trans *trans;
+
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -882,19 +1000,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(chain->flags & NFT_BASE_CHAIN))
return -EOPNOTSUPP;
- err = nf_tables_counters(nft_base_chain(chain),
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0)
- return err;
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats))
+ return PTR_ERR(stats);
}
- if (nla[NFTA_CHAIN_POLICY])
- nft_base_chain(chain)->policy = policy;
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+ sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ nft_trans_chain_stats(trans) = stats;
+ nft_trans_chain_update(trans) = true;
- if (nla[NFTA_CHAIN_HANDLE] && name)
- nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+ if (nla[NFTA_CHAIN_POLICY])
+ nft_trans_chain_policy(trans) = policy;
+ else
+ nft_trans_chain_policy(trans) = -1;
- goto notify;
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ nla_strlcpy(nft_trans_chain_name(trans), name,
+ NFT_CHAIN_MAXNAMELEN);
+ }
+ list_add_tail(&trans->list, &net->nft.commit_list);
+ return 0;
}
if (table->use == UINT_MAX)
@@ -939,23 +1069,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOMEM;
if (nla[NFTA_CHAIN_COUNTERS]) {
- err = nf_tables_counters(basechain,
- nla[NFTA_CHAIN_COUNTERS]);
- if (err < 0) {
+ stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return err;
+ return PTR_ERR(stats);
}
+ basechain->stats = stats;
} else {
- struct nft_stats __percpu *newstats;
-
- newstats = alloc_percpu(struct nft_stats);
- if (newstats == NULL) {
+ stats = alloc_percpu(struct nft_stats);
+ if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
- return -ENOMEM;
+ return PTR_ERR(stats);
}
- rcu_assign_pointer(basechain->stats, newstats);
+ rcu_assign_pointer(basechain->stats, stats);
}
basechain->type = type;
@@ -992,31 +1120,27 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
chain->flags & NFT_BASE_CHAIN) {
err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0) {
- module_put(basechain->type->owner);
- free_percpu(basechain->stats);
- kfree(basechain);
- return err;
- }
+ if (err < 0)
+ goto err1;
}
- list_add_tail(&chain->list, &table->chains);
- table->use++;
-notify:
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
- family);
- return 0;
-}
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
- BUG_ON(chain->use > 0);
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+ if (err < 0)
+ goto err2;
- if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
- } else
- kfree(chain);
+ table->use++;
+ list_add_tail(&chain->list, &table->chains);
+ return 0;
+err2:
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(chain)->ops,
+ afi->nops);
+ }
+err1:
+ nf_tables_chain_destroy(chain);
+ return err;
}
static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1024,11 +1148,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nft_ctx ctx;
+ int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1037,48 +1163,27 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
-
- if (!list_empty(&chain->rules) || chain->use > 0)
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
+ if (chain->use > 0)
return -EBUSY;
- list_del(&chain->list);
- table->use--;
-
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
- nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
- family);
-
- /* Make sure all rule references are gone before this is released */
- synchronize_rcu();
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
- nf_tables_chain_destroy(chain);
+ table->use--;
+ list_del(&chain->list);
return 0;
}
-static void nft_ctx_init(struct nft_ctx *ctx,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nft_af_info *afi,
- const struct nft_table *table,
- const struct nft_chain *chain,
- const struct nlattr * const *nla)
-{
- ctx->net = sock_net(skb->sk);
- ctx->skb = skb;
- ctx->nlh = nlh;
- ctx->afi = afi;
- ctx->table = table;
- ctx->chain = chain;
- ctx->nla = nla;
-}
-
/*
* Expressions
*/
@@ -1093,7 +1198,10 @@ static void nft_ctx_init(struct nft_ctx *ctx,
int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&type->list, &nf_tables_expressions);
+ if (type->family == NFPROTO_UNSPEC)
+ list_add_tail(&type->list, &nf_tables_expressions);
+ else
+ list_add(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1361,22 +1469,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_rule_notify(const struct sk_buff *oskb,
- const struct nlmsghdr *nlh,
- const struct nft_table *table,
- const struct nft_chain *chain,
+static int nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule,
- int event, u32 flags, int family)
+ int event)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(oskb).portid;
- struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
- u32 seq = nlh->nlmsg_seq;
- bool report;
int err;
- report = nlmsg_report(nlh);
- if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
@@ -1384,18 +1485,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
- family, table, chain, rule);
+ err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
+ ctx->afi->family, ctx->table,
+ ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ if (err < 0) {
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ err);
+ }
return err;
}
@@ -1511,10 +1615,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
if (IS_ERR(chain))
return PTR_ERR(chain);
+ if (chain->flags & NFT_CHAIN_INACTIVE)
+ return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -1554,37 +1662,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-#define NFT_RULE_MAXEXPRS 128
-
-static struct nft_expr_info *info;
-
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
{
- struct nft_rule_trans *rupd;
+ struct nft_trans *trans;
- rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
- if (rupd == NULL)
- return NULL;
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
- rupd->ctx = *ctx;
- rupd->rule = rule;
- list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return rupd;
+ return trans;
}
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
- struct nft_rule_trans *repl = NULL;
+ struct nft_trans *trans = NULL;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
@@ -1682,8 +1789,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- repl = nf_tables_trans_add(&ctx, old_rule);
- if (repl == NULL) {
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
+ old_rule);
+ if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
@@ -1705,19 +1813,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
list_add_rcu(&rule->list, &chain->rules);
}
- if (nf_tables_trans_add(&ctx, rule) == NULL) {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
goto err3;
}
+ chain->use++;
return 0;
err3:
list_del_rcu(&rule->list);
- if (repl) {
- list_del_rcu(&repl->rule->list);
- list_del(&repl->list);
- nft_rule_clear(net, repl->rule);
- kfree(repl);
+ if (trans) {
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nft_rule_clear(net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
}
err2:
nf_tables_rule_destroy(&ctx, rule);
@@ -1734,9 +1842,10 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nf_tables_trans_add(ctx, rule) == NULL)
+ if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
return -ENOMEM;
nft_rule_disactivate_next(ctx->net, rule);
+ ctx->chain->use--;
return 0;
}
return -ENOENT;
@@ -1760,9 +1869,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
- const struct nft_table *table;
+ struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
@@ -1775,6 +1884,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1807,88 +1918,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return err;
}
-static int nf_tables_commit(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- /* Bump generation counter, invalidate any dump in progress */
- net->nft.genctr++;
-
- /* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
-
- /* Make sure all packets have left the previous generation before
- * purging old rules.
- */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* This rule was inactive in the past and just became active.
- * Clear the next bit of the genmask since its meaning has
- * changed, now it is the future.
- */
- if (nft_rule_is_active(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_NEWRULE, 0,
- rupd->ctx.afi->family);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is in the past, get rid of it */
- list_del_rcu(&rupd->rule->list);
- nf_tables_rule_notify(skb, rupd->ctx.nlh,
- rupd->ctx.table, rupd->ctx.chain,
- rupd->rule, NFT_MSG_DELRULE, 0,
- rupd->ctx.afi->family);
- }
-
- /* Make sure we don't see any packet traversing old rules */
- synchronize_rcu();
-
- /* Now we can safely release unused old rules */
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
-static int nf_tables_abort(struct sk_buff *skb)
-{
- struct net *net = sock_net(skb->sk);
- struct nft_rule_trans *rupd, *tmp;
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- if (!nft_rule_is_active_next(net, rupd->rule)) {
- nft_rule_clear(net, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- continue;
- }
-
- /* This rule is inactive, get rid of it */
- list_del_rcu(&rupd->rule->list);
- }
-
- /* Make sure we don't see any packet accessing aborted rules */
- synchronize_rcu();
-
- list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
- list_del(&rupd->list);
- kfree(rupd);
- }
-
- return 0;
-}
-
/*
* Sets
*/
@@ -1912,9 +1941,18 @@ void nft_unregister_set(struct nft_set_ops *ops)
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+ const struct nft_set_desc *desc,
+ enum nft_set_policies policy)
{
- const struct nft_set_ops *ops;
+ const struct nft_set_ops *ops, *bops;
+ struct nft_set_estimate est, best;
u32 features;
#ifdef CONFIG_MODULES
@@ -1932,15 +1970,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
features &= NFT_SET_INTERVAL | NFT_SET_MAP;
}
- // FIXME: implement selection properly
+ bops = NULL;
+ best.size = ~0;
+ best.class = ~0;
+
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
continue;
+ if (!ops->estimate(desc, features, &est))
+ continue;
+
+ switch (policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ if (est.class < best.class)
+ break;
+ if (est.class == best.class && est.size < best.size)
+ break;
+ continue;
+ case NFT_SET_POL_MEMORY:
+ if (est.size < best.size)
+ break;
+ if (est.size == best.size && est.class < best.class)
+ break;
+ continue;
+ default:
+ break;
+ }
+
if (!try_module_get(ops->owner))
continue;
- return ops;
+ if (bops != NULL)
+ module_put(bops->owner);
+
+ bops = ops;
+ best = est;
}
+ if (bops != NULL)
+ return bops;
+
return ERR_PTR(-EOPNOTSUPP);
}
@@ -1953,6 +2021,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
[NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
[NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_POLICY] = { .type = NLA_U32 },
+ [NFTA_SET_DESC] = { .type = NLA_NESTED },
+ [NFTA_SET_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+ [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1962,8 +2037,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
{
struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi = NULL;
- const struct nft_table *table = NULL;
+ struct nft_af_info *afi = NULL;
+ struct nft_table *table = NULL;
if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1978,6 +2053,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
}
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1999,13 +2076,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ struct nft_trans *trans;
+ u32 id = ntohl(nla_get_be32(nla));
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->msg_type == NFT_MSG_NEWSET &&
+ id == nft_trans_set_id(trans))
+ return nft_trans_set(trans);
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
{
const struct nft_set *i;
const char *p;
unsigned long *inuse;
- unsigned int n = 0;
+ unsigned int n = 0, min = 0;
p = strnchr(name, IFNAMSIZ, '%');
if (p != NULL) {
@@ -2015,23 +2106,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
-
+cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
+ if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
continue;
- set_bit(tmp, inuse);
+ set_bit(tmp - min, inuse);
}
n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
+ if (n >= BITS_PER_BYTE * PAGE_SIZE) {
+ min += BITS_PER_BYTE * PAGE_SIZE;
+ memset(inuse, 0, PAGE_SIZE);
+ goto cont;
+ }
free_page((unsigned long)inuse);
}
- snprintf(set->name, sizeof(set->name), name, n);
+ snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
if (!strcmp(set->name, i->name))
return -ENFILE;
@@ -2044,8 +2140,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
{
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- u32 seq = ctx->nlh->nlmsg_seq;
+ struct nlattr *desc;
+ u32 portid = ctx->portid;
+ u32 seq = ctx->seq;
event |= NFNL_SUBSYS_NFTABLES << 8;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2077,6 +2174,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ desc = nla_nest_start(skb, NFTA_SET_DESC);
+ if (desc == NULL)
+ goto nla_put_failure;
+ if (set->size &&
+ nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+ goto nla_put_failure;
+ nla_nest_end(skb, desc);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -2086,19 +2191,18 @@ nla_put_failure:
static int nf_tables_set_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
- int event)
+ int event, gfp_t gfp_flags)
{
struct sk_buff *skb;
- u32 portid = NETLINK_CB(ctx->skb).portid;
- bool report;
+ u32 portid = ctx->portid;
int err;
- report = nlmsg_report(ctx->nlh);
- if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ if (!ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return 0;
err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
if (skb == NULL)
goto err;
@@ -2108,8 +2212,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
- GFP_KERNEL);
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+ ctx->report, gfp_flags);
err:
if (err < 0)
nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2183,7 +2287,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
int cur_family = cb->args[3];
@@ -2260,6 +2364,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
}
+#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
+
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2289,6 +2395,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2305,13 +2413,50 @@ err:
return err;
}
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+ struct nft_set_desc *desc,
+ const struct nlattr *nla)
+{
+ struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+ if (err < 0)
+ return err;
+
+ if (da[NFTA_SET_DESC_SIZE] != NULL)
+ desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+ return 0;
+}
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
- const struct nft_af_info *afi;
+ struct nft_af_info *afi;
struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
@@ -2319,14 +2464,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, klen, dlen, dtype, flags;
+ u32 ktype, dtype, flags, policy;
+ struct nft_set_desc desc;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
nla[NFTA_SET_NAME] == NULL ||
- nla[NFTA_SET_KEY_LEN] == NULL)
+ nla[NFTA_SET_KEY_LEN] == NULL ||
+ nla[NFTA_SET_ID] == NULL)
return -EINVAL;
+ memset(&desc, 0, sizeof(desc));
+
ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2334,8 +2483,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
- klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
flags = 0;
@@ -2347,7 +2496,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
dtype = 0;
- dlen = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
@@ -2360,15 +2508,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
- dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (dlen == 0 ||
- dlen > FIELD_SIZEOF(struct nft_data, data))
+ desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (desc.dlen == 0 ||
+ desc.dlen > FIELD_SIZEOF(struct nft_data, data))
return -EINVAL;
} else
- dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_data);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ policy = NFT_SET_POL_PERFORMANCE;
+ if (nla[NFTA_SET_POLICY] != NULL)
+ policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+ if (nla[NFTA_SET_DESC] != NULL) {
+ err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+ if (err < 0)
+ return err;
+ }
+
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2399,7 +2557,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(nla);
+ ops = nft_select_set_ops(nla, &desc, policy);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -2420,17 +2578,22 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&set->bindings);
set->ops = ops;
set->ktype = ktype;
- set->klen = klen;
+ set->klen = desc.klen;
set->dtype = dtype;
- set->dlen = dlen;
+ set->dlen = desc.dlen;
set->flags = flags;
+ set->size = desc.size;
+
+ err = ops->init(set, &desc, nla);
+ if (err < 0)
+ goto err2;
- err = ops->init(set, nla);
+ err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err2;
list_add_tail(&set->list, &table->sets);
- nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+ table->use++;
return 0;
err2:
@@ -2440,16 +2603,20 @@ err1:
return err;
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
{
- list_del(&set->list);
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
set->ops->destroy(set);
module_put(set->ops->owner);
kfree(set);
}
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del(&set->list);
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+ nft_set_destroy(set);
+}
+
static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2471,10 +2638,17 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
- nf_tables_set_destroy(&ctx, set);
+ err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del(&set->list);
+ ctx.table->use--;
return 0;
}
@@ -2534,7 +2708,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
{
list_del(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ !(set->flags & NFT_SET_INACTIVE))
nf_tables_set_destroy(ctx, set);
}
@@ -2552,16 +2727,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
[NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ bool trans)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- const struct nft_af_info *afi;
- const struct nft_table *table;
+ struct nft_af_info *afi;
+ struct nft_table *table;
struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2571,6 +2748,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
+ if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+ return -ENOENT;
nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
return 0;
@@ -2644,13 +2823,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+ false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2707,13 +2889,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_INACTIVE)
+ return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2724,7 +2908,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nf_tables_fill_setelem_info(struct sk_buff *skb,
+ const struct nft_ctx *ctx, u32 seq,
+ u32 portid, int event, u16 flags,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ int err;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ err = nf_tables_fill_setelem(skb, set, elem);
+ if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ int event, u16 flags)
+{
+ struct net *net = ctx->net;
+ u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ int err;
+
+ if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
+ set, elem);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+ int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_elem_set(trans) = set;
+ return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2732,8 +3007,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
struct nft_set_binding *binding;
enum nft_registers dreg;
+ struct nft_trans *trans;
int err;
+ if (set->size && set->nelems == set->size)
+ return -ENFILE;
+
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -2786,7 +3065,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_ctx bind_ctx = {
.afi = ctx->afi,
.table = ctx->table,
- .chain = binding->chain,
+ .chain = (struct nft_chain *)binding->chain,
};
err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2796,12 +3075,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+ if (trans == NULL)
+ goto err3;
+
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err3;
+ goto err4;
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err4:
+ kfree(trans);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_uninit(&elem.data, d2.type);
@@ -2815,35 +3102,46 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+ set = nf_tables_set_lookup_byid(net,
+ nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems++;
}
- return 0;
+ return err;
}
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc desc;
struct nft_set_elem elem;
+ struct nft_trans *trans;
int err;
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2867,7 +3165,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err2;
- set->ops->remove(set, &elem);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+ if (trans == NULL)
+ goto err2;
+
+ nft_trans_elem(trans) = elem;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
nft_data_uninit(&elem.key, NFT_DATA_VALUE);
if (set->flags & NFT_SET_MAP)
@@ -2886,9 +3189,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err;
+ int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -2901,14 +3204,16 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
- return err;
+ break;
+
+ set->nelems--;
}
- return 0;
+ return err;
}
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
- .call = nf_tables_newtable,
+ .call_batch = nf_tables_newtable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
@@ -2918,12 +3223,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
- .call = nf_tables_deltable,
+ .call_batch = nf_tables_deltable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
- .call = nf_tables_newchain,
+ .call_batch = nf_tables_newchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2933,7 +3238,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
- .call = nf_tables_delchain,
+ .call_batch = nf_tables_delchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -2953,7 +3258,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
- .call = nf_tables_newset,
+ .call_batch = nf_tables_newset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
@@ -2963,12 +3268,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
- .call = nf_tables_delset,
+ .call_batch = nf_tables_delset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
- .call = nf_tables_newsetelem,
+ .call_batch = nf_tables_newsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
@@ -2978,12 +3283,282 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
- .call = nf_tables_delsetelem,
+ .call_batch = nf_tables_delsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
};
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+ struct nft_base_chain *basechain;
+
+ if (nft_trans_chain_name(trans)[0])
+ strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+ if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+ nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+ switch (nft_trans_chain_policy(trans)) {
+ case NF_DROP:
+ case NF_ACCEPT:
+ basechain->policy = nft_trans_chain_policy(trans);
+ break;
+ }
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_DELRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_DELSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ net->nft.genctr++;
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (!nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ } else {
+ trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ }
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELTABLE:
+ nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans))
+ nft_chain_commit_update(trans);
+ else
+ trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELCHAIN:
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ break;
+ case NFT_MSG_NEWRULE:
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_NEWRULE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELRULE:
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ nf_tables_rule_notify(&trans->ctx,
+ nft_trans_rule(trans),
+ NFT_MSG_DELRULE);
+ break;
+ case NFT_MSG_NEWSET:
+ nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_NEWSET, GFP_KERNEL);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSET:
+ nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+ NFT_MSG_DELSET, GFP_KERNEL);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_NEWSETELEM, 0);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nf_tables_setelem_notify(&trans->ctx,
+ nft_trans_elem_set(trans),
+ &nft_trans_elem(trans),
+ NFT_MSG_DELSETELEM, 0);
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
+ }
+
+ return 0;
+}
+
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+ struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ nf_tables_table_destroy(&trans->ctx);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ nf_tables_chain_destroy(trans->ctx.chain);
+ break;
+ case NFT_MSG_NEWRULE:
+ nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+ break;
+ case NFT_MSG_NEWSET:
+ nft_set_destroy(nft_trans_set(trans));
+ break;
+ }
+ kfree(trans);
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_trans *trans, *next;
+ struct nft_set *set;
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWTABLE:
+ if (nft_trans_table_update(trans)) {
+ if (nft_trans_table_enable(trans)) {
+ nf_tables_table_disable(trans->ctx.afi,
+ trans->ctx.table);
+ trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+ }
+ nft_trans_destroy(trans);
+ } else {
+ list_del(&trans->ctx.table->list);
+ }
+ break;
+ case NFT_MSG_DELTABLE:
+ list_add_tail(&trans->ctx.table->list,
+ &trans->ctx.afi->tables);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain_update(trans)) {
+ if (nft_trans_chain_stats(trans))
+ free_percpu(nft_trans_chain_stats(trans));
+
+ nft_trans_destroy(trans);
+ } else {
+ trans->ctx.table->use--;
+ list_del(&trans->ctx.chain->list);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+ trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+ nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+ trans->ctx.afi->nops);
+ }
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+ trans->ctx.table->use++;
+ list_add_tail(&trans->ctx.chain->list,
+ &trans->ctx.table->chains);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWRULE:
+ trans->ctx.chain->use--;
+ list_del_rcu(&nft_trans_rule(trans)->list);
+ break;
+ case NFT_MSG_DELRULE:
+ trans->ctx.chain->use++;
+ nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSET:
+ trans->ctx.table->use--;
+ list_del(&nft_trans_set(trans)->list);
+ break;
+ case NFT_MSG_DELSET:
+ trans->ctx.table->use++;
+ list_add_tail(&nft_trans_set(trans)->list,
+ &trans->ctx.table->sets);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_NEWSETELEM:
+ nft_trans_elem_set(trans)->nelems--;
+ set = nft_trans_elem_set(trans);
+ set->ops->get(set, &nft_trans_elem(trans));
+ set->ops->remove(set, &nft_trans_elem(trans));
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELSETELEM:
+ nft_trans_elem_set(trans)->nelems++;
+ nft_trans_destroy(trans);
+ break;
+ }
+ }
+
+ list_for_each_entry_safe_reverse(trans, next,
+ &net->nft.commit_list, list) {
+ list_del(&trans->list);
+ trans->ctx.nla = NULL;
+ call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
+ }
+
+ return 0;
+}
+
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 23ef77c60fff..c138b8fbe280 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -399,19 +399,17 @@ static void nfnetlink_rcv(struct sk_buff *skb)
}
#ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
{
const struct nfnetlink_subsystem *ss;
int type = nfnl_group2type[group];
rcu_read_lock();
ss = nfnetlink_get_subsys(type);
- if (!ss) {
- rcu_read_unlock();
- request_module("nfnetlink-subsys-%d", type);
- return;
- }
rcu_read_unlock();
+ if (!ss)
+ request_module("nfnetlink-subsys-%d", type);
+ return 0;
}
#endif
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d466a662..2baa125c2e8d 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list);
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
+ unsigned long flags;
struct list_head head;
atomic_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
+ char data[0];
};
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
static int
nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
char *acct_name;
+ unsigned int size = 0;
+ u32 flags = 0;
if (!tb[NFACCT_NAME])
return -EINVAL;
@@ -68,15 +74,38 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
/* reset counters if you request a replacement. */
atomic64_set(&matching->pkts, 0);
atomic64_set(&matching->bytes, 0);
+ smp_mb__before_atomic();
+ /* reset overquota flag if quota is enabled. */
+ if ((matching->flags & NFACCT_F_QUOTA))
+ clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
return 0;
}
return -EBUSY;
}
- nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+ if (tb[NFACCT_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+ if (flags & ~NFACCT_F_QUOTA)
+ return -EOPNOTSUPP;
+ if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+ return -EINVAL;
+ if (flags & NFACCT_F_OVERQUOTA)
+ return -EINVAL;
+
+ size += sizeof(u64);
+ }
+
+ nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
if (nfacct == NULL)
return -ENOMEM;
+ if (flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)nfacct->data;
+
+ *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+ nfacct->flags = flags;
+ }
+
strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
@@ -117,6 +146,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
pkts = atomic64_xchg(&acct->pkts, 0);
bytes = atomic64_xchg(&acct->bytes, 0);
+ smp_mb__before_atomic();
+ if (acct->flags & NFACCT_F_QUOTA)
+ clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
} else {
pkts = atomic64_read(&acct->pkts);
bytes = atomic64_read(&acct->bytes);
@@ -125,7 +157,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
goto nla_put_failure;
+ if (acct->flags & NFACCT_F_QUOTA) {
+ u64 *quota = (u64 *)acct->data;
+ if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+ nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+ goto nla_put_failure;
+ }
nlmsg_end(skb, nlh);
return skb->len;
@@ -270,6 +308,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
[NFACCT_BYTES] = { .type = NLA_U64 },
[NFACCT_PKTS] = { .type = NLA_U64 },
+ [NFACCT_FLAGS] = { .type = NLA_U32 },
+ [NFACCT_QUOTA] = { .type = NLA_U64 },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +376,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_update);
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+ int ret;
+ struct sk_buff *skb;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+ nfacct);
+ if (ret <= 0) {
+ kfree_skb(skb);
+ return;
+ }
+ netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+ GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+ u64 now;
+ u64 *quota;
+ int ret = NFACCT_UNDERQUOTA;
+
+ /* no place here if we don't have a quota */
+ if (!(nfacct->flags & NFACCT_F_QUOTA))
+ return NFACCT_NO_QUOTA;
+
+ quota = (u64 *)nfacct->data;
+ now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+ atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+ ret = now > *quota;
+
+ if (now >= *quota &&
+ !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+ nfnl_overquota_report(nfacct);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
static int __init nfnl_acct_init(void)
{
int ret;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0d41e69341..cc5603016242 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family)
nf_ct_l3proto_module_put(family);
}
-static int nft_ct_init_validate_get(const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
- if (tb[NFTA_CT_DIRECTION] != NULL) {
- priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
- switch (priv->dir) {
- case IP_CT_DIR_ORIGINAL:
- case IP_CT_DIR_REPLY:
- break;
- default:
- return -EINVAL;
- }
- }
-
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
case NFT_CT_STATE:
case NFT_CT_DIRECTION:
@@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
return -EOPNOTSUPP;
}
- return 0;
-}
-
-static int nft_ct_init_validate_set(uint32_t key)
-{
- switch (key) {
- case NFT_CT_MARK:
- break;
- default:
- return -EOPNOTSUPP;
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ err = nft_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+
return 0;
}
-static int nft_ct_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_ct_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
-
- if (tb[NFTA_CT_DREG]) {
- err = nft_ct_init_validate_get(expr, tb);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
- if (err < 0)
- return err;
- } else {
- err = nft_ct_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ break;
+#endif
+ default:
+ return -EOPNOTSUPP;
}
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
return err;
@@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
- .init = nft_ct_init,
+ .init = nft_ct_get_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_get_dump,
};
@@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
- .init = nft_ct_init,
+ .init = nft_ct_set_init,
.destroy = nft_ct_destroy,
.dump = nft_ct_set_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad876d6b0..4080ed6a072b 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#define NFT_HASH_MIN_SIZE 4
+#define NFT_HASH_MIN_SIZE 4UL
struct nft_hash {
struct nft_hash_table __rcu *tbl;
@@ -27,7 +28,6 @@ struct nft_hash {
struct nft_hash_table {
unsigned int size;
- unsigned int elements;
struct nft_hash_elem __rcu *buckets[];
};
@@ -76,10 +76,12 @@ static bool nft_hash_lookup(const struct nft_set *set,
static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
- if (is_vmalloc_addr(tbl))
- vfree(tbl);
- else
- kfree(tbl);
+ kvfree(tbl);
+}
+
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+ return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
}
static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
@@ -161,7 +163,6 @@ static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
break;
}
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -201,7 +202,6 @@ static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
;
RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
}
- ntbl->elements = tbl->elements;
/* Publish new table */
rcu_assign_pointer(priv->tbl, ntbl);
@@ -237,10 +237,9 @@ static int nft_hash_insert(const struct nft_set *set,
h = nft_hash_data(&he->key, tbl->size, set->klen);
RCU_INIT_POINTER(he->next, tbl->buckets[h]);
rcu_assign_pointer(tbl->buckets[h], he);
- tbl->elements++;
/* Expand table when exceeding 75% load */
- if (tbl->elements > tbl->size / 4 * 3)
+ if (set->nelems + 1 > tbl->size / 4 * 3)
nft_hash_tbl_expand(set, priv);
return 0;
@@ -268,10 +267,9 @@ static void nft_hash_remove(const struct nft_set *set,
RCU_INIT_POINTER(*pprev, he->next);
synchronize_rcu();
kfree(he);
- tbl->elements--;
/* Shrink table beneath 30% load */
- if (tbl->elements < tbl->size * 3 / 10 &&
+ if (set->nelems - 1 < tbl->size * 3 / 10 &&
tbl->size > NFT_HASH_MIN_SIZE)
nft_hash_tbl_shrink(set, priv);
}
@@ -335,17 +333,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
}
static int nft_hash_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_table *tbl;
+ unsigned int size;
if (unlikely(!nft_hash_rnd_initted)) {
get_random_bytes(&nft_hash_rnd, 4);
nft_hash_rnd_initted = true;
}
- tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+ size = NFT_HASH_MIN_SIZE;
+ if (desc->size)
+ size = nft_hash_tbl_size(desc->size);
+
+ tbl = nft_hash_tbl_alloc(size);
if (tbl == NULL)
return -ENOMEM;
RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +373,37 @@ static void nft_hash_destroy(const struct nft_set *set)
kfree(tbl);
}
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int esize;
+
+ esize = sizeof(struct nft_hash_elem);
+ if (features & NFT_SET_MAP)
+ esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+ if (desc->size) {
+ est->size = sizeof(struct nft_hash) +
+ nft_hash_tbl_size(desc->size) *
+ sizeof(struct nft_hash_elem *) +
+ desc->size * esize;
+ } else {
+ /* Resizing happens when the load drops below 30% or goes
+ * above 75%. The average of 52.5% load (approximated by 50%)
+ * is used for the size estimation of the hash buckets,
+ * meaning we calculate two buckets per element.
+ */
+ est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+ }
+
+ est->class = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.get = nft_hash_get,
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea8aa23..6404a726d17b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return -EINVAL;
set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
- if (IS_ERR(set))
- return PTR_ERR(set);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_LOOKUP_SET_ID]) {
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_LOOKUP_SET_ID]);
+ }
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
err = nft_validate_input_register(priv->sreg);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39af890..852b178c6ae7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
-struct nft_meta {
- enum nft_meta_keys key:8;
- union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
- };
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
err:
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-static void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
+EXPORT_SYMBOL_GPL(nft_meta_policy);
-static int nft_meta_init_validate_set(uint32_t key)
+int nft_meta_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- switch (key) {
- case NFT_META_MARK:
- case NFT_META_PRIORITY:
- case NFT_META_NFTRACE:
- return 0;
- default:
- return -EOPNOTSUPP;
- }
-}
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
-static int nft_meta_init_validate_get(uint32_t key)
-{
- switch (key) {
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
case NFT_META_LEN:
case NFT_META_PROTOCOL:
case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key)
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
- return 0;
+ break;
default:
return -EOPNOTSUPP;
}
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
-
- if (tb[NFTA_META_DREG]) {
- err = nft_meta_init_validate_get(priv->key);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- return nft_validate_data_load(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE);
+ switch (priv->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+ break;
+ default:
+ return -EOPNOTSUPP;
}
- err = nft_meta_init_validate_set(priv->key);
- if (err < 0)
- return err;
-
priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
err = nft_validate_input_register(priv->sreg);
if (err < 0)
@@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
-static int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
-static int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
- .init = nft_meta_init,
+ .init = nft_meta_get_init,
.dump = nft_meta_get_dump,
};
@@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
- .init = nft_meta_init,
+ .init = nft_meta_set_init,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d13506..e1836ff88199 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -18,6 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
struct nft_rbtree {
struct rb_root root;
};
@@ -38,6 +40,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
const struct rb_node *parent = priv->root.rb_node;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -53,6 +56,8 @@ found:
goto out;
if (set->flags & NFT_SET_MAP)
nft_data_copy(data, rbe->data);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return true;
}
}
@@ -62,6 +67,7 @@ found:
goto found;
}
out:
+ spin_unlock_bh(&nft_rbtree_lock);
return false;
}
@@ -124,9 +130,12 @@ static int nft_rbtree_insert(const struct nft_set *set,
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(rbe->data, &elem->data);
+ spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
if (err < 0)
kfree(rbe);
+
+ spin_unlock_bh(&nft_rbtree_lock);
return err;
}
@@ -136,7 +145,9 @@ static void nft_rbtree_remove(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->cookie;
+ spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
+ spin_unlock_bh(&nft_rbtree_lock);
kfree(rbe);
}
@@ -147,6 +158,7 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
struct nft_rbtree_elem *rbe;
int d;
+ spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -161,9 +173,11 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
!(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem->data, rbe->data);
elem->flags = rbe->flags;
+ spin_unlock_bh(&nft_rbtree_lock);
return 0;
}
}
+ spin_unlock_bh(&nft_rbtree_lock);
return -ENOENT;
}
@@ -176,6 +190,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_elem elem;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
if (iter->count < iter->skip)
goto cont;
@@ -188,11 +203,14 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
elem.flags = rbe->flags;
iter->err = iter->fn(ctx, set, iter, &elem);
- if (iter->err < 0)
+ if (iter->err < 0) {
+ spin_unlock_bh(&nft_rbtree_lock);
return;
+ }
cont:
iter->count++;
}
+ spin_unlock_bh(&nft_rbtree_lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -201,6 +219,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
}
static int nft_rbtree_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
const struct nlattr * const nla[])
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -215,15 +234,37 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
+ spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
+ spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ unsigned int nsize;
+
+ nsize = sizeof(struct nft_rbtree_elem);
+ if (features & NFT_SET_MAP)
+ nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+ if (desc->size)
+ est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+ else
+ est->size = nsize;
+
+ est->class = NFT_SET_CLASS_O_LOG_N;
+
+ return true;
}
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8e6c77..bbffdbdaf603 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf");
static int bpf_mt_check(const struct xt_mtchk_param *par)
{
struct xt_bpf_info *info = par->matchinfo;
- struct sock_fprog program;
+ struct sock_fprog_kern program;
program.len = info->bpf_program_num_elem;
- program.filter = (struct sock_filter __user *) info->bpf_program;
+ program.filter = info->bpf_program;
+
if (sk_unattached_filter_create(&info->filter, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef21f19..8c646ed9c921 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct");
static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
+ int overquota;
const struct xt_nfacct_match_info *info = par->targinfo;
nfnl_acct_update(skb, info->nfacct);
- return true;
+ overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+ return overquota == NFACCT_UNDERQUOTA ? false : true;
}
static int
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1e657cf715c4..a9faae89f955 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -313,10 +313,7 @@ out:
static void recent_table_free(void *addr)
{
- if (is_vmalloc_addr(addr))
- vfree(addr);
- else
- kfree(addr);
+ kvfree(addr);
}
static int recent_mt_check(const struct xt_mtchk_param *par,