diff options
57 files changed, 1747 insertions, 2419 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9077b3ebea08..bf384b3eedb8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -289,9 +289,9 @@ bool xt_find_jump_offset(const unsigned int *offsets, int xt_check_proc_name(const char *name, unsigned int size); -int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, +int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); -int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, +int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); int xt_match_to_user(const struct xt_entry_match *m, diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b5aac5ae5129..5ee7b30b4917 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -190,23 +190,23 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies, int do_acct); + u32 extra_jiffies, bool do_acct); /* Refresh conntrack for this many jiffies and do accounting */ static inline void nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); + __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); } /* Refresh conntrack for this many jiffies */ static inline void nf_ct_refresh(struct nf_conn *ct, const struct sk_buff *skb, - unsigned long extra_jiffies) + u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); + __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); } /* kill conntrack and do accounting */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 8aff77cafb8b..cf332c4e0b32 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -31,8 +31,7 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { union nf_conntrack_nat_help help; -#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ - IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) int masq_index; #endif }; @@ -61,8 +60,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, struct nf_conn_nat *nat, const struct net_device *out) { -#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ - IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) +#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE) return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; @@ -75,4 +73,43 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, const struct nf_hook_ops *nat_ops, unsigned int ops_count); void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, unsigned int ops_count); + +unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + unsigned int hooknum, struct sk_buff *skb); + +unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, + enum nf_nat_manip_type mtype, + enum ip_conntrack_dir dir); +void nf_nat_csum_recalc(struct sk_buff *skb, + u8 nfproto, u8 proto, void *data, __sum16 *check, + int datalen, int oldlen); + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum); + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, unsigned int hdrlen); + +int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); + +unsigned int +nf_nat_inet_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + +int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family); + +static inline int nf_nat_initialized(struct nf_conn *ct, + enum nf_nat_manip_type manip) +{ + if (manip == NF_NAT_MANIP_SRC) + return ct->status & IPS_SRC_NAT_DONE; + else + return ct->status & IPS_DST_NAT_DONE; +} #endif diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h deleted file mode 100644 index dc7cd0440229..000000000000 --- a/include/net/netfilter/nf_nat_core.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_CORE_H -#define _NF_NAT_CORE_H -#include <linux/list.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> - -/* This header used to share core functionality between the standalone - NAT module, and the compatibility layer's use of NAT for masquerading. */ - -unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - unsigned int hooknum, struct sk_buff *skb); - -unsigned int -nf_nat_inet_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state); - -int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family); - -static inline int nf_nat_initialized(struct nf_conn *ct, - enum nf_nat_manip_type manip) -{ - if (manip == NF_NAT_MANIP_SRC) - return ct->status & IPS_SRC_NAT_DONE; - else - return ct->status & IPS_DST_NAT_DONE; -} - -#endif /* _NF_NAT_CORE_H */ diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h deleted file mode 100644 index d774ca0c4c5e..000000000000 --- a/include/net/netfilter/nf_nat_l3proto.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NF_NAT_L3PROTO_H -#define _NF_NAT_L3PROTO_H - -struct nf_nat_l3proto { - u8 l3proto; - - bool (*manip_pkt)(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype); - - void (*csum_update)(struct sk_buff *skb, unsigned int iphdroff, - __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype); - - void (*csum_recalc)(struct sk_buff *skb, u8 proto, - void *data, __sum16 *check, - int datalen, int oldlen); - - void (*decode_session)(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range2 *range); -}; - -int nf_nat_l3proto_register(const struct nf_nat_l3proto *); -void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *); -const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto); - -int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum); - -int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, unsigned int hdrlen); - -int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); -void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); - -int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); -void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); - -#endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h deleted file mode 100644 index 95a4655bd1ad..000000000000 --- a/include/net/netfilter/nf_nat_l4proto.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Header for use in defining a given protocol. */ -#ifndef _NF_NAT_L4PROTO_H -#define _NF_NAT_L4PROTO_H -#include <net/netfilter/nf_nat.h> -#include <linux/netfilter/nfnetlink_conntrack.h> - -struct nf_nat_l3proto; - -/* Translate a packet to the target according to manip type. Return on success. */ -bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype); -#endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a66fcd316734..c331e96a713b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -690,10 +690,12 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, gcb->elems[gcb->head.cnt++] = elem; } +struct nft_expr_ops; /** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops + * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier @@ -706,6 +708,7 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, struct nft_expr_type { const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, const struct nlattr * const tb[]); + void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; struct list_head list; const char *name; diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h deleted file mode 100644 index e51ab3815797..000000000000 --- a/include/net/netfilter/nft_masq.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_MASQ_H_ -#define _NFT_MASQ_H_ - -struct nft_masq { - u32 flags; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; -}; - -extern const struct nla_policy nft_masq_policy[]; - -int nft_masq_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr); - -int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data); - -#endif /* _NFT_MASQ_H_ */ diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h deleted file mode 100644 index 4a970737c03c..000000000000 --- a/include/net/netfilter/nft_redir.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_REDIR_H_ -#define _NFT_REDIR_H_ - -struct nft_redir { - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; - u16 flags; -}; - -extern const struct nla_policy nft_redir_policy[]; - -int nft_redir_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr); - -int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data); - -#endif /* _NFT_REDIR_H_ */ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 40d058378b52..9d34de68571b 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -831,7 +831,8 @@ static unsigned int ip_sabotage_in(void *priv, struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (nf_bridge && !nf_bridge->in_prerouting && - !netif_is_l3_master(skb->dev)) { + !netif_is_l3_master(skb->dev) && + !netif_is_l3_slave(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 6693e209efe8..eb15891f8b9f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -31,10 +31,6 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ - "report to author: "format, ## args) -/* #define BUGPRINT(format, args...) */ - /* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to @@ -385,7 +381,7 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, par->match = match; par->matchinfo = m->data; ret = xt_check_match(par, m->match_size, - e->ethproto, e->invflags & EBT_IPROTO); + ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(match->me); return ret; @@ -422,7 +418,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, par->target = watcher; par->targinfo = w->data; ret = xt_check_target(par, w->watcher_size, - e->ethproto, e->invflags & EBT_IPROTO); + ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(watcher->me); return ret; @@ -466,8 +462,6 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, /* we make userspace set this right, * so there is no misunderstanding */ - BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " - "in distinguisher\n"); return -EINVAL; } if (i != NF_BR_NUMHOOKS) @@ -485,18 +479,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, offset += e->next_offset; } } - if (offset != limit) { - BUGPRINT("entries_size too small\n"); + if (offset != limit) return -EINVAL; - } /* check if all valid hooks have a chain */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i] && - (valid_hooks & (1 << i))) { - BUGPRINT("Valid hook without chain\n"); + (valid_hooks & (1 << i))) return -EINVAL; - } } return 0; } @@ -523,26 +513,20 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* this checks if the previous chain has as many entries * as it said it has */ - if (*n != *cnt) { - BUGPRINT("nentries does not equal the nr of entries " - "in the chain\n"); + if (*n != *cnt) return -EINVAL; - } + if (((struct ebt_entries *)e)->policy != EBT_DROP && ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { /* only RETURN from udc */ if (i != NF_BR_NUMHOOKS || - ((struct ebt_entries *)e)->policy != EBT_RETURN) { - BUGPRINT("bad policy\n"); + ((struct ebt_entries *)e)->policy != EBT_RETURN) return -EINVAL; - } } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; - if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { - BUGPRINT("counter_offset != totalcnt"); + if (((struct ebt_entries *)e)->counter_offset != *totalcnt) return -EINVAL; - } *n = ((struct ebt_entries *)e)->nentries; *cnt = 0; return 0; @@ -550,15 +534,13 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* a plain old entry, heh */ if (sizeof(struct ebt_entry) > e->watchers_offset || e->watchers_offset > e->target_offset || - e->target_offset >= e->next_offset) { - BUGPRINT("entry offsets not in right order\n"); + e->target_offset >= e->next_offset) return -EINVAL; - } + /* this is not checked anywhere else */ - if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) { - BUGPRINT("target size too small\n"); + if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) return -EINVAL; - } + (*cnt)++; (*totalcnt)++; return 0; @@ -678,18 +660,15 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, if (e->bitmask == 0) return 0; - if (e->bitmask & ~EBT_F_MASK) { - BUGPRINT("Unknown flag for bitmask\n"); + if (e->bitmask & ~EBT_F_MASK) return -EINVAL; - } - if (e->invflags & ~EBT_INV_MASK) { - BUGPRINT("Unknown flag for inv bitmask\n"); + + if (e->invflags & ~EBT_INV_MASK) return -EINVAL; - } - if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { - BUGPRINT("NOPROTO & 802_3 not allowed\n"); + + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) return -EINVAL; - } + /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i]) @@ -748,13 +727,11 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, t->u.target = target; if (t->u.target == &ebt_standard_target) { if (gap < sizeof(struct ebt_standard_target)) { - BUGPRINT("Standard target size too big\n"); ret = -EFAULT; goto cleanup_watchers; } if (((struct ebt_standard_target *)t)->verdict < -NUM_STANDARD_TARGETS) { - BUGPRINT("Invalid standard target\n"); ret = -EFAULT; goto cleanup_watchers; } @@ -767,7 +744,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, tgpar.target = target; tgpar.targinfo = t->data; ret = xt_check_target(&tgpar, t->target_size, - e->ethproto, e->invflags & EBT_IPROTO); + ntohs(e->ethproto), e->invflags & EBT_IPROTO); if (ret < 0) { module_put(target->me); goto cleanup_watchers; @@ -813,10 +790,9 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (strcmp(t->u.name, EBT_STANDARD_TARGET)) goto letscontinue; if (e->target_offset + sizeof(struct ebt_standard_target) > - e->next_offset) { - BUGPRINT("Standard target size too big\n"); + e->next_offset) return -1; - } + verdict = ((struct ebt_standard_target *)t)->verdict; if (verdict >= 0) { /* jump to another chain */ struct ebt_entries *hlp2 = @@ -825,14 +801,12 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (hlp2 == cl_s[i].cs.chaininfo) break; /* bad destination or loop */ - if (i == udc_cnt) { - BUGPRINT("bad destination\n"); + if (i == udc_cnt) return -1; - } - if (cl_s[i].cs.n) { - BUGPRINT("loop\n"); + + if (cl_s[i].cs.n) return -1; - } + if (cl_s[i].hookmask & (1 << hooknr)) goto letscontinue; /* this can't be 0, so the loop test is correct */ @@ -865,24 +839,21 @@ static int translate_table(struct net *net, const char *name, i = 0; while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; - if (i == NF_BR_NUMHOOKS) { - BUGPRINT("No valid hooks specified\n"); + if (i == NF_BR_NUMHOOKS) return -EINVAL; - } - if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) { - BUGPRINT("Chains don't start at beginning\n"); + + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) return -EINVAL; - } + /* make sure chains are ordered after each other in same order * as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; - if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) { - BUGPRINT("Hook order must be followed\n"); + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) return -EINVAL; - } + i = j; } @@ -900,15 +871,11 @@ static int translate_table(struct net *net, const char *name, if (ret != 0) return ret; - if (i != j) { - BUGPRINT("nentries does not equal the nr of entries in the " - "(last) chain\n"); + if (i != j) return -EINVAL; - } - if (k != newinfo->nentries) { - BUGPRINT("Total nentries is wrong\n"); + + if (k != newinfo->nentries) return -EINVAL; - } /* get the location of the udc, put them in an array * while we're at it, allocate the chainstack @@ -942,7 +909,6 @@ static int translate_table(struct net *net, const char *name, ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { - BUGPRINT("i != udc_cnt\n"); vfree(cl_s); return -EFAULT; } @@ -1042,7 +1008,6 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_unlock; if (repl->num_counters && repl->num_counters != t->private->nentries) { - BUGPRINT("Wrong nr. of counters requested\n"); ret = -EINVAL; goto free_unlock; } @@ -1118,15 +1083,12 @@ static int do_replace(struct net *net, const void __user *user, if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.entries_size) { - BUGPRINT("Wrong len argument\n"); + if (len != sizeof(tmp) + tmp.entries_size) return -EINVAL; - } - if (tmp.entries_size == 0) { - BUGPRINT("Entries_size never zero\n"); + if (tmp.entries_size == 0) return -EINVAL; - } + /* overflow check */ if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) @@ -1153,7 +1115,6 @@ static int do_replace(struct net *net, const void __user *user, } if (copy_from_user( newinfo->entries, tmp.entries, tmp.entries_size) != 0) { - BUGPRINT("Couldn't copy entries from userspace\n"); ret = -EFAULT; goto free_entries; } @@ -1194,10 +1155,8 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (input_table == NULL || (repl = input_table->table) == NULL || repl->entries == NULL || repl->entries_size == 0 || - repl->counters != NULL || input_table->private != NULL) { - BUGPRINT("Bad table data for ebt_register_table!!!\n"); + repl->counters != NULL || input_table->private != NULL) return -EINVAL; - } /* Don't add one table to multiple lists. */ table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); @@ -1235,13 +1194,10 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, ((char *)repl->hook_entry[i] - repl->entries); } ret = translate_table(net, repl->name, newinfo); - if (ret != 0) { - BUGPRINT("Translate_table failed\n"); + if (ret != 0) goto free_chainstack; - } if (table->check && table->check(newinfo, table->valid_hooks)) { - BUGPRINT("The table doesn't like its own initial data, lol\n"); ret = -EINVAL; goto free_chainstack; } @@ -1252,7 +1208,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; - BUGPRINT("Table name already exists\n"); goto free_unlock; } } @@ -1320,7 +1275,6 @@ static int do_update_counters(struct net *net, const char *name, goto free_tmp; if (num_counters != t->private->nentries) { - BUGPRINT("Wrong nr of counters\n"); ret = -EINVAL; goto unlock_mutex; } @@ -1447,10 +1401,8 @@ static int copy_counters_to_user(struct ebt_table *t, if (num_counters == 0) return 0; - if (num_counters != nentries) { - BUGPRINT("Num_counters wrong\n"); + if (num_counters != nentries) return -EINVAL; - } counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp))); if (!counterstmp) @@ -1496,15 +1448,11 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; - if (tmp.nentries != nentries) { - BUGPRINT("Nentries wrong\n"); + if (tmp.nentries != nentries) return -EINVAL; - } - if (tmp.entries_size != entries_size) { - BUGPRINT("Wrong size\n"); + if (tmp.entries_size != entries_size) return -EINVAL; - } ret = copy_counters_to_user(t, oldcounters, tmp.counters, tmp.num_counters, nentries); @@ -1576,7 +1524,6 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } mutex_unlock(&ebt_mutex); if (copy_to_user(user, &tmp, *len) != 0) { - BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 80f72cc5ca8d..c98391d49200 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -94,50 +94,7 @@ config NF_REJECT_IPV4 tristate "IPv4 packet rejection" default m if NETFILTER_ADVANCED=n -config NF_NAT_IPV4 - tristate "IPv4 NAT" - depends on NF_CONNTRACK - default m if NETFILTER_ADVANCED=n - select NF_NAT - help - The IPv4 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. This can be - controlled by iptables or nft. - -if NF_NAT_IPV4 - -config NF_NAT_MASQUERADE_IPV4 - bool - -if NF_TABLES -config NFT_CHAIN_NAT_IPV4 - depends on NF_TABLES_IPV4 - tristate "IPv4 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv4 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - -config NFT_MASQ_IPV4 - tristate "IPv4 masquerading support for nf_tables" - depends on NF_TABLES_IPV4 - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV4 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV4 - tristate "IPv4 redirect support for nf_tables" - depends on NF_TABLES_IPV4 - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. -endif # NF_TABLES - +if NF_NAT config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" depends on NF_CONNTRACK_SNMP @@ -166,7 +123,7 @@ config NF_NAT_H323 depends on NF_CONNTRACK default NF_CONNTRACK_H323 -endif # NF_NAT_IPV4 +endif # NF_NAT config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" @@ -263,7 +220,6 @@ config IP_NF_NAT depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n select NF_NAT - select NF_NAT_IPV4 select NETFILTER_XT_NAT help This enables the `nat' table in iptables. This allows masquerading, @@ -276,7 +232,7 @@ if IP_NF_NAT config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE_IPV4 + select NF_NAT_MASQUERADE default m if NETFILTER_ADVANCED=n help Masquerading is a special case of NAT: all outgoing connections are diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index fd7122e0e2c9..e241f5188ebe 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -3,10 +3,6 @@ # Makefile for the netfilter modules on top of IPv4. # -nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o -nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o -obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o - # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o @@ -29,11 +25,8 @@ $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o -obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o -obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o -obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o # flow table support diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index a317445448bf..007da0882412 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -15,8 +15,6 @@ #include <net/ip.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> static int __net_init iptable_nat_table_init(struct net *net); @@ -70,10 +68,10 @@ static int ipt_nat_register_lookups(struct net *net) int i, ret; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) { - ret = nf_nat_l3proto_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]); + ret = nf_nat_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]); if (ret) { while (i) - nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]); + nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]); return ret; } @@ -87,7 +85,7 @@ static void ipt_nat_unregister_lookups(struct net *net) int i; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) - nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]); + nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]); } static int __net_init iptable_nat_table_init(struct net *net) diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c deleted file mode 100644 index 4b07eb8a9b18..000000000000 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/icmp.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <net/secure_seq.h> -#include <net/checksum.h> -#include <net/route.h> -#include <net/ip.h> - -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; - -#ifdef CONFIG_XFRM -static void nf_nat_ipv4_decode_session(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl) -{ - const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; - struct flowi4 *fl4 = &fl->u.ip4; - - if (ct->status & statusbit) { - fl4->daddr = t->dst.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_dport = t->dst.u.all; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl4->saddr = t->src.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_sport = t->src.u.all; - } -} -#endif /* CONFIG_XFRM */ - -static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct iphdr *iph; - unsigned int hdroff; - - if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) - return false; - - iph = (void *)skb->data + iphdroff; - hdroff = iphdroff + iph->ihl * 4; - - if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, - hdroff, target, maniptype)) - return false; - iph = (void *)skb->data + iphdroff; - - if (maniptype == NF_NAT_MANIP_SRC) { - csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); - iph->saddr = target->src.u3.ip; - } else { - csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); - iph->daddr = target->dst.u3.ip; - } - return true; -} - -static void nf_nat_ipv4_csum_update(struct sk_buff *skb, - unsigned int iphdroff, __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype) -{ - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - __be32 oldip, newip; - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = iph->saddr; - newip = t->src.u3.ip; - } else { - oldip = iph->daddr; - newip = t->dst.u3.ip; - } - inet_proto_csum_replace4(check, skb, oldip, newip, true); -} - -static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, - u8 proto, void *data, __sum16 *check, - int datalen, int oldlen) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) { - const struct iphdr *iph = ip_hdr(skb); - - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + - ip_hdrlen(skb); - skb->csum_offset = (void *)check - data; - *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, - proto, 0); - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), true); -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_NAT_V4_MINIP]) { - range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); - range->flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (tb[CTA_NAT_V4_MAXIP]) - range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); - else - range->max_addr.ip = range->min_addr.ip; - - return 0; -} -#endif - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { - .l3proto = NFPROTO_IPV4, - .manip_pkt = nf_nat_ipv4_manip_pkt, - .csum_update = nf_nat_ipv4_csum_update, - .csum_recalc = nf_nat_ipv4_csum_recalc, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, -#endif -#ifdef CONFIG_XFRM - .decode_session = nf_nat_ipv4_decode_session, -#endif -}; - -int nf_nat_icmp_reply_translation(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum) -{ - struct { - struct icmphdr icmp; - struct iphdr ip; - } *inside; - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - unsigned int hdrlen = ip_hdrlen(skb); - struct nf_conntrack_tuple target; - unsigned long statusbit; - - WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) - return 0; - - inside = (void *)skb->data + hdrlen; - if (inside->icmp.type == ICMP_REDIRECT) { - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply direction */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - /* Reloading "inside" here since manip_pkt may reallocate */ - inside = (void *)skb->data + hdrlen; - inside->icmp.checksum = 0; - inside->icmp.checksum = - csum_fold(skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - /* Change outer to look like the reply to an incoming packet */ - nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); - target.dst.protonum = IPPROTO_ICMP; - if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); - -static unsigned int -nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return NF_ACCEPT; - - if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - state->hook)) - return NF_DROP; - else - return NF_ACCEPT; - } - } - - return nf_nat_inet_fn(priv, skb, state); -} - -static unsigned int -nf_nat_ipv4_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - __be32 daddr = ip_hdr(skb)->daddr; - - ret = nf_nat_ipv4_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(skb)->daddr) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_ipv4_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - int err; -#endif - unsigned int ret; - - ret = nf_nat_ipv4_fn(priv, skb, state); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if ((ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip) || - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } -#endif - return ret; -} - -static unsigned int -nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - int err; - - ret = nf_nat_ipv4_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.dst.u3.ip != - ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#ifdef CONFIG_XFRM - else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#endif - } - return ret; -} - -static const struct nf_hook_ops nf_nat_ipv4_ops[] = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv4_in, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv4_out, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv4_local_fn, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv4_fn, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, - }, -}; - -int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn); - -void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn); - -static int __init nf_nat_l3proto_ipv4_init(void) -{ - return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); -} - -static void __exit nf_nat_l3proto_ipv4_exit(void) -{ - nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); - -module_init(nf_nat_l3proto_ipv4_init); -module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c deleted file mode 100644 index a3c4ea303e3e..000000000000 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> - * Copyright (c) 2012 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv4.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/ip.h> - -static unsigned int nft_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static int nft_nat_ipv4_reg(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_l3proto_ipv4_register_fn(net, ops); -} - -static void nft_nat_ipv4_unreg(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_l3proto_ipv4_unregister_fn(net, ops); -} - -static const struct nft_chain_type nft_chain_nat_ipv4 = { - .name = "nat", - .type = NFT_CHAIN_T_NAT, - .family = NFPROTO_IPV4, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .hooks = { - [NF_INET_PRE_ROUTING] = nft_nat_do_chain, - [NF_INET_POST_ROUTING] = nft_nat_do_chain, - [NF_INET_LOCAL_OUT] = nft_nat_do_chain, - [NF_INET_LOCAL_IN] = nft_nat_do_chain, - }, - .ops_register = nft_nat_ipv4_reg, - .ops_unregister = nft_nat_ipv4_unreg, -}; - -static int __init nft_chain_nat_init(void) -{ - nft_register_chain_type(&nft_chain_nat_ipv4); - - return 0; -} - -static void __exit nft_chain_nat_exit(void) -{ - nft_unregister_chain_type(&nft_chain_nat_ipv4); -} - -module_init(nft_chain_nat_init); -module_exit(nft_chain_nat_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c deleted file mode 100644 index 6847de1d1db8..000000000000 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nft_masq.h> -#include <net/netfilter/ipv4/nf_nat_masquerade.h> - -static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), - &range, nft_out(pkt)); -} - -static void -nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV4); -} - -static struct nft_expr_type nft_masq_ipv4_type; -static const struct nft_expr_ops nft_masq_ipv4_ops = { - .type = &nft_masq_ipv4_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv4_eval, - .init = nft_masq_init, - .destroy = nft_masq_ipv4_destroy, - .dump = nft_masq_dump, - .validate = nft_masq_validate, -}; - -static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { - .family = NFPROTO_IPV4, - .name = "masq", - .ops = &nft_masq_ipv4_ops, - .policy = nft_masq_policy, - .maxattr = NFTA_MASQ_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_masq_ipv4_module_init(void) -{ - int ret; - - ret = nft_register_expr(&nft_masq_ipv4_type); - if (ret < 0) - return ret; - - ret = nf_nat_masquerade_ipv4_register_notifier(); - if (ret) - nft_unregister_expr(&nft_masq_ipv4_type); - - return ret; -} - -static void __exit nft_masq_ipv4_module_exit(void) -{ - nft_unregister_expr(&nft_masq_ipv4_type); - nf_nat_masquerade_ipv4_unregister_notifier(); -} - -module_init(nft_masq_ipv4_module_init); -module_exit(nft_masq_ipv4_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c deleted file mode 100644 index 5120be1d3118..000000000000 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_redirect.h> -#include <net/netfilter/nft_redir.h> - -static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_ipv4_multi_range_compat mr; - - memset(&mr, 0, sizeof(mr)); - if (priv->sreg_proto_min) { - mr.range[0].min.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - mr.range[0].max.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - mr.range[0].flags |= priv->flags; - - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); -} - -static void -nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV4); -} - -static struct nft_expr_type nft_redir_ipv4_type; -static const struct nft_expr_ops nft_redir_ipv4_ops = { - .type = &nft_redir_ipv4_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv4_eval, - .init = nft_redir_init, - .destroy = nft_redir_ipv4_destroy, - .dump = nft_redir_dump, - .validate = nft_redir_validate, -}; - -static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { - .family = NFPROTO_IPV4, - .name = "redir", - .ops = &nft_redir_ipv4_ops, - .policy = nft_redir_policy, - .maxattr = NFTA_REDIR_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_redir_ipv4_module_init(void) -{ - return nft_register_expr(&nft_redir_ipv4_type); -} - -static void __exit nft_redir_ipv4_module_exit(void) -{ - nft_unregister_expr(&nft_redir_ipv4_type); -} - -module_init(nft_redir_ipv4_module_init); -module_exit(nft_redir_ipv4_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 339d0762b027..ddc99a1653aa 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -31,34 +31,6 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -if NF_NAT_IPV6 - -config NFT_CHAIN_NAT_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - -endif # NF_NAT_IPV6 - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -106,23 +78,6 @@ config NF_LOG_IPV6 default m if NETFILTER_ADVANCED=n select NF_LOG_COMMON -config NF_NAT_IPV6 - tristate "IPv6 NAT" - depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED - select NF_NAT - help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. This can be - controlled by iptables or nft. - -if NF_NAT_IPV6 - -config NF_NAT_MASQUERADE_IPV6 - bool - -endif # NF_NAT_IPV6 - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -311,7 +266,6 @@ config IP6_NF_NAT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_NAT - select NF_NAT_IPV6 select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, @@ -324,7 +278,7 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE_IPV6 + select NF_NAT_MASQUERADE help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ea43d5256e0..3853c648ebaa 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,10 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o -nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o -nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o -obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o - # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o @@ -32,10 +28,7 @@ obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o -obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o -obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o -obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 67ba70ab9f5c..3e1fab9d7503 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -17,8 +17,6 @@ #include <net/ipv6.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> static int __net_init ip6table_nat_table_init(struct net *net); @@ -72,10 +70,10 @@ static int ip6t_nat_register_lookups(struct net *net) int i, ret; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) { - ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); + ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); if (ret) { while (i) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); return ret; } @@ -89,7 +87,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) int i; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); } static int __net_init ip6table_nat_table_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c deleted file mode 100644 index 490bfd3c9162..000000000000 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of IPv6 NAT funded by Astaro. - */ -#include <linux/types.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/secure_seq.h> -#include <net/checksum.h> -#include <net/ip6_checksum.h> -#include <net/ip6_route.h> -#include <net/xfrm.h> -#include <net/ipv6.h> - -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; - -#ifdef CONFIG_XFRM -static void nf_nat_ipv6_decode_session(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl) -{ - const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; - struct flowi6 *fl6 = &fl->u.ip6; - - if (ct->status & statusbit) { - fl6->daddr = t->dst.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_dport = t->dst.u.all; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl6->saddr = t->src.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_sport = t->src.u.all; - } -} -#endif - -static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct ipv6hdr *ipv6h; - __be16 frag_off; - int hdroff; - u8 nexthdr; - - if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) - return false; - - ipv6h = (void *)skb->data + iphdroff; - nexthdr = ipv6h->nexthdr; - hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), - &nexthdr, &frag_off); - if (hdroff < 0) - goto manip_addr; - - if ((frag_off & htons(~0x7)) == 0 && - !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, - target, maniptype)) - return false; - - /* must reload, offset might have changed */ - ipv6h = (void *)skb->data + iphdroff; - -manip_addr: - if (maniptype == NF_NAT_MANIP_SRC) - ipv6h->saddr = target->src.u3.in6; - else - ipv6h->daddr = target->dst.u3.in6; - - return true; -} - -static void nf_nat_ipv6_csum_update(struct sk_buff *skb, - unsigned int iphdroff, __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype) -{ - const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); - const struct in6_addr *oldip, *newip; - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = &ipv6h->saddr; - newip = &t->src.u3.in6; - } else { - oldip = &ipv6h->daddr; - newip = &t->dst.u3.in6; - } - inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, true); -} - -static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, - u8 proto, void *data, __sum16 *check, - int datalen, int oldlen) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + - (data - (void *)skb->data); - skb->csum_offset = (void *)check - data; - *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - datalen, proto, 0); - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), true); -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_NAT_V6_MINIP]) { - nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], - sizeof(struct in6_addr)); - range->flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (tb[CTA_NAT_V6_MAXIP]) - nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], - sizeof(struct in6_addr)); - else - range->max_addr = range->min_addr; - - return 0; -} -#endif - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { - .l3proto = NFPROTO_IPV6, - .manip_pkt = nf_nat_ipv6_manip_pkt, - .csum_update = nf_nat_ipv6_csum_update, - .csum_recalc = nf_nat_ipv6_csum_recalc, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, -#endif -#ifdef CONFIG_XFRM - .decode_session = nf_nat_ipv6_decode_session, -#endif -}; - -int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - unsigned int hdrlen) -{ - struct { - struct icmp6hdr icmp6; - struct ipv6hdr ip6; - } *inside; - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - struct nf_conntrack_tuple target; - unsigned long statusbit; - - WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) - return 0; - - inside = (void *)skb->data + hdrlen; - if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply direction */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - inside = (void *)skb->data + hdrlen; - inside->icmp6.icmp6_cksum = 0; - inside->icmp6.icmp6_cksum = - csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - skb->len - hdrlen, IPPROTO_ICMPV6, - skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); - target.dst.protonum = IPPROTO_ICMPV6; - if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); - -static unsigned int -nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be16 frag_off; - int hdrlen; - u8 nexthdr; - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - * have dropped it. Hence it's the user's responsibilty to - * packet filter it out, or implement conntrack/NAT for that - * protocol. 8) --RR - */ - if (!ct) - return NF_ACCEPT; - - if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { - nexthdr = ipv6_hdr(skb)->nexthdr; - hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - state->hook, - hdrlen)) - return NF_DROP; - else - return NF_ACCEPT; - } - } - - return nf_nat_inet_fn(priv, skb, state); -} - -static unsigned int -nf_nat_ipv6_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct in6_addr daddr = ipv6_hdr(skb)->daddr; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_ipv6_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - int err; -#endif - unsigned int ret; - - ret = nf_nat_ipv6_fn(priv, skb, state); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3) || - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } -#endif - return ret; -} - -static int nat_route_me_harder(struct net *net, struct sk_buff *skb) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, skb); -#else - return ip6_route_me_harder(net, skb); -#endif -} - -static unsigned int -nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - int err; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, - &ct->tuplehash[!dir].tuple.src.u3)) { - err = nat_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#ifdef CONFIG_XFRM - else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#endif - } - return ret; -} - -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_in, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_out, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_local_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_NAT_SRC, - }, -}; - -int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn); - -void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn); - -static int __init nf_nat_l3proto_ipv6_init(void) -{ - return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); -} - -static void __exit nf_nat_l3proto_ipv6_exit(void) -{ - nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); - -module_init(nf_nat_l3proto_ipv6_init); -module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c deleted file mode 100644 index fd313b726263..000000000000 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include <linux/kernel.h> -#include <linux/atomic.h> -#include <linux/netdevice.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/netfilter/nf_nat.h> -#include <net/addrconf.h> -#include <net/ipv6.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -#define MAX_WORK_COUNT 16 - -static atomic_t v6_worker_count; - -static int -nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#else - return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#endif -} - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - struct in6_addr src; - struct nf_conn *ct; - struct nf_nat_range2 newrange; - - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY))); - - if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) - return NF_DROP; - - nat = nf_ct_nat_ext_add(ct); - if (nat) - nat->masq_index = out->ifindex; - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = src; - newrange.max_addr.in6 = src; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); - -static int device_cmp(struct nf_conn *ct, void *ifindex) -{ - const struct nf_conn_nat *nat = nfct_nat(ct); - - if (!nat) - return 0; - if (nf_ct_l3num(ct) != NFPROTO_IPV6) - return 0; - return nat->masq_index == (int)(long)ifindex; -} - -static int masq_device_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - const struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net *net = dev_net(dev); - - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); - - return NOTIFY_DONE; -} - -static struct notifier_block masq_dev_notifier = { - .notifier_call = masq_device_event, -}; - -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - -/* ipv6 inet notifier is an atomic notifier, i.e. we cannot - * schedule. - * - * Unfortunately, nf_ct_iterate_cleanup_net can run for a long - * time if there are lots of conntracks and the system - * handles high softirq load, so it frequently calls cond_resched - * while iterating the conntrack table. - * - * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. - * - * As we can have 'a lot' of inet_events (depending on amount - * of ipv6 addresses being deleted), we also need to add an upper - * limit to the number of queued work items. - */ -static int masq_inet6_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct inet6_ifaddr *ifa = ptr; - const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; - - if (event != NETDEV_DOWN || - atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) - return NOTIFY_DONE; - - dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - - if (!try_module_get(THIS_MODULE)) - goto err_module; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); - - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); - return NOTIFY_DONE; -} - -static struct notifier_block masq_inet6_notifier = { - .notifier_call = masq_inet6_event, -}; - -static int masq_refcnt; -static DEFINE_MUTEX(masq_mutex); - -int nf_nat_masquerade_ipv6_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) - goto out_unlock; - - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - - ret = register_inet6addr_notifier(&masq_inet6_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); - -void nf_nat_masquerade_ipv6_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt > 0) - goto out_unlock; - - unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c deleted file mode 100644 index 8a081ad7d5db..000000000000 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv6.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/ipv6.h> - -static unsigned int nft_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_l3proto_ipv6_register_fn(net, ops); -} - -static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_l3proto_ipv6_unregister_fn(net, ops); -} - -static const struct nft_chain_type nft_chain_nat_ipv6 = { - .name = "nat", - .type = NFT_CHAIN_T_NAT, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .hooks = { - [NF_INET_PRE_ROUTING] = nft_nat_do_chain, - [NF_INET_POST_ROUTING] = nft_nat_do_chain, - [NF_INET_LOCAL_OUT] = nft_nat_do_chain, - [NF_INET_LOCAL_IN] = nft_nat_do_chain, - }, - .ops_register = nft_nat_ipv6_reg, - .ops_unregister = nft_nat_ipv6_unreg, -}; - -static int __init nft_chain_nat_ipv6_init(void) -{ - nft_register_chain_type(&nft_chain_nat_ipv6); - - return 0; -} - -static void __exit nft_chain_nat_ipv6_exit(void) -{ - nft_unregister_chain_type(&nft_chain_nat_ipv6); -} - -module_init(nft_chain_nat_ipv6_init); -module_exit(nft_chain_nat_ipv6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c deleted file mode 100644 index e06c82e9dfcd..000000000000 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_masq.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - -static void -nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_masq_ipv6_type; -static const struct nft_expr_ops nft_masq_ipv6_ops = { - .type = &nft_masq_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, - .init = nft_masq_init, - .destroy = nft_masq_ipv6_destroy, - .dump = nft_masq_dump, - .validate = nft_masq_validate, -}; - -static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "masq", - .ops = &nft_masq_ipv6_ops, - .policy = nft_masq_policy, - .maxattr = NFTA_MASQ_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_masq_ipv6_module_init(void) -{ - int ret; - - ret = nft_register_expr(&nft_masq_ipv6_type); - if (ret < 0) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; -} - -static void __exit nft_masq_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); -} - -module_init(nft_masq_ipv6_module_init); -module_exit(nft_masq_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c deleted file mode 100644 index 74269865acc8..000000000000 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_redir.h> -#include <net/netfilter/nf_nat_redirect.h> - -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - -static void -nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_redir_ipv6_type; -static const struct nft_expr_ops nft_redir_ipv6_ops = { - .type = &nft_redir_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, - .init = nft_redir_init, - .destroy = nft_redir_ipv6_destroy, - .dump = nft_redir_dump, - .validate = nft_redir_validate, -}; - -static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "redir", - .ops = &nft_redir_ipv6_ops, - .policy = nft_redir_policy, - .maxattr = NFTA_REDIR_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_redir_ipv6_module_init(void) -{ - return nft_register_expr(&nft_redir_ipv6_type); -} - -static void __exit nft_redir_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_redir_ipv6_type); -} - -module_init(nft_redir_ipv6_module_init); -module_exit(nft_redir_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fefd63a243f2..d43ffb09939b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -396,7 +396,13 @@ config NETFILTER_NETLINK_GLUE_CT the enqueued via NFNETLINK. config NF_NAT - tristate + tristate "Network Address Translation support" + depends on NF_CONNTRACK + default m if NETFILTER_ADVANCED=n + help + The NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. This can be + controlled by iptables, ip6tables or nft. config NF_NAT_NEEDED bool @@ -431,6 +437,9 @@ config NF_NAT_TFTP config NF_NAT_REDIRECT bool +config NF_NAT_MASQUERADE + bool + config NETFILTER_SYNPROXY tristate @@ -523,6 +532,7 @@ config NFT_LIMIT config NFT_MASQ depends on NF_CONNTRACK depends on NF_NAT + select NF_NAT_MASQUERADE tristate "Netfilter nf_tables masquerade support" help This option adds the "masquerade" expression that you can use @@ -532,6 +542,7 @@ config NFT_REDIR depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables redirect support" + select NF_NAT_REDIRECT help This options adds the "redirect" expression that you can use to perform NAT in the redirect flavour. @@ -539,6 +550,7 @@ config NFT_REDIR config NFT_NAT depends on NF_CONNTRACK select NF_NAT + depends on NF_TABLES_IPV4 || NF_TABLES_IPV6 tristate "Netfilter nf_tables nat module" help This option adds the "nat" expression that you can use to perform diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index e66067befa42..4894a85cdd0b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o obj-$(CONFIG_NF_NAT) += nf_nat.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o +nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o @@ -109,6 +110,8 @@ obj-$(CONFIG_NFT_OSF) += nft_osf.o obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o +obj-$(CONFIG_NFT_NAT) += nft_chain_nat.o + # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2a3d4e27cf3b..053cd96b9c76 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3086,7 +3086,7 @@ static bool ip_vs_is_af_valid(int af) static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *usvc, - struct nlattr *nla, int full_entry, + struct nlattr *nla, bool full_entry, struct ip_vs_service **ret_svc) { struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; @@ -3173,7 +3173,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -3283,7 +3283,7 @@ out_err: } static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, - struct nlattr *nla, int full_entry) + struct nlattr *nla, bool full_entry) { struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; struct nlattr *nla_addr, *nla_port; @@ -3545,11 +3545,11 @@ out: static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) { + bool need_full_svc = false, need_full_dest = false; struct ip_vs_service *svc = NULL; struct ip_vs_service_user_kern usvc; struct ip_vs_dest_user_kern udest; int ret = 0, cmd; - int need_full_svc = 0, need_full_dest = 0; struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); @@ -3573,7 +3573,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) * received a valid one. We need a full service specification when * adding / editing a service. Only identifying members otherwise. */ if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) - need_full_svc = 1; + need_full_svc = true; ret = ip_vs_genl_parse_service(ipvs, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], @@ -3593,7 +3593,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || cmd == IPVS_CMD_DEL_DEST) { if (cmd != IPVS_CMD_DEL_DEST) - need_full_dest = 1; + need_full_dest = true; ret = ip_vs_genl_parse_dest(&udest, info->attrs[IPVS_CMD_ATTR_DEST], diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4398a72edec5..fe69d46ff779 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -124,7 +124,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, } s = data + plen; if (skip) { - int found = 0; + bool found = false; for (;; s++) { if (s == data_limit) @@ -136,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (!ext && isdigit(*s)) break; if (*s == skip) - found = 1; + found = true; } else if (*s != skip) { break; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index bc3d1625ecc8..b58ddb7dffd1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -186,7 +186,7 @@ static int sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { unsigned int sctphoff; - struct sctphdr *sh, _sctph; + struct sctphdr *sh; __le32 cmp, val; #ifdef CONFIG_IP_VS_IPV6 @@ -196,10 +196,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) #endif sctphoff = ip_hdrlen(skb); - sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); - if (sh == NULL) - return 0; - + sh = (struct sctphdr *)(skb->data + sctphoff); cmp = sh->checksum; val = sctp_compute_cksum(skb, sctphoff); diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 479419759983..00ce07dda980 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -153,8 +153,8 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { struct tcphdr *tcph; unsigned int tcphoff = iph->len; + bool payload_csum = false; int oldlen; - int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -180,7 +180,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (ret == 1) oldlen = skb->len - tcphoff; else - payload_csum = 1; + payload_csum = true; } tcph = (void *)skb_network_header(skb) + tcphoff; @@ -231,8 +231,8 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { struct tcphdr *tcph; unsigned int tcphoff = iph->len; + bool payload_csum = false; int oldlen; - int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -261,7 +261,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (ret == 1) oldlen = skb->len - tcphoff; else - payload_csum = 1; + payload_csum = true; } tcph = (void *)skb_network_header(skb) + tcphoff; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 646c384910fb..92c078abcb3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -143,8 +143,8 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { struct udphdr *udph; unsigned int udphoff = iph->len; + bool payload_csum = false; int oldlen; - int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -172,7 +172,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (ret == 1) oldlen = skb->len - udphoff; else - payload_csum = 1; + payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; @@ -226,8 +226,8 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { struct udphdr *udph; unsigned int udphoff = iph->len; + bool payload_csum = false; int oldlen; - int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -256,7 +256,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (ret == 1) oldlen = skb->len - udphoff; else - payload_csum = 1; + payload_csum = true; } udph = (void *)skb_network_header(skb) + udphoff; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 473cce2a5231..175349fcf91f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -126,7 +126,7 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, { struct flowi4 fl4; struct rtable *rt; - int loop = 0; + bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -149,7 +149,7 @@ retry: ip_rt_put(rt); *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); - loop++; + loop = true; goto retry; } *saddr = fl4.saddr; diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 20edd589fe06..f2681ec5b5f6 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -54,6 +54,7 @@ enum amanda_strings { SEARCH_DATA, SEARCH_MESG, SEARCH_INDEX, + SEARCH_STATE, }; static struct { @@ -81,6 +82,10 @@ static struct { .string = "INDEX ", .len = 6, }, + [SEARCH_STATE] = { + .string = "STATE ", + .len = 6, + }, }; static int amanda_help(struct sk_buff *skb, @@ -124,7 +129,7 @@ static int amanda_help(struct sk_buff *skb, goto out; stop += start; - for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { + for (i = SEARCH_DATA; i <= SEARCH_STATE; i++) { off = skb_find_text(skb, start, stop, search[i].ts); if (off == UINT_MAX) continue; @@ -168,7 +173,7 @@ out: } static const struct nf_conntrack_expect_policy amanda_exp_policy = { - .max_expected = 3, + .max_expected = 4, .timeout = 180, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e139c256e269..82bfbeef46af 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -51,7 +51,6 @@ #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netns/hash.h> #include <net/ip.h> @@ -1752,11 +1751,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies, - int do_acct) + u32 extra_jiffies, + bool do_acct) { - WARN_ON(!skb); - /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) goto acct; @@ -1765,7 +1762,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, if (nf_ct_is_confirmed(ct)) extra_jiffies += nfct_time_stamp; - ct->timeout = extra_jiffies; + if (ct->timeout != extra_jiffies) + ct->timeout = extra_jiffies; acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); @@ -2482,6 +2480,7 @@ int nf_conntrack_init_net(struct net *net) int cpu; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); + BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); atomic_set(&net->ct.count, 0); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 349b42a65c8a..66c596d287a5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -46,7 +46,7 @@ #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> #ifdef CONFIG_NF_NAT_NEEDED -#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #endif diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 01c748fa8913..a06875a466a4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -828,6 +828,12 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static bool nf_conntrack_tcp_established(const struct nf_conn *ct) +{ + return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && + test_bit(IPS_ASSURED_BIT, &ct->status); +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -1030,16 +1036,38 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, new_state = TCP_CONNTRACK_ESTABLISHED; break; case TCP_CONNTRACK_CLOSE: - if (index == TCP_RST_SET - && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) - && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { - /* Invalid RST */ - spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); - return -NF_ACCEPT; + if (index != TCP_RST_SET) + break; + + if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) { + u32 seq = ntohl(th->seq); + + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { + /* Invalid RST */ + spin_unlock_bh(&ct->lock); + nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); + return -NF_ACCEPT; + } + + if (!nf_conntrack_tcp_established(ct) || + seq == ct->proto.tcp.seen[!dir].td_maxack) + break; + + /* Check if rst is part of train, such as + * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42 + * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42 + */ + if (ct->proto.tcp.last_index == TCP_ACK_SET && + ct->proto.tcp.last_dir == dir && + seq == ct->proto.tcp.last_end) + break; + + /* ... RST sequence number doesn't match exactly, keep + * established state to allow a possible challenge ACK. + */ + new_state = old_state; } - if (index == TCP_RST_SET - && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) + if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) @@ -1055,7 +1083,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, * segments we ignored. */ goto in_window; } - /* Just fall through */ + break; default: /* Keep compilers happy. */ break; @@ -1090,6 +1118,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; + else if (unlikely(index == TCP_RST_SET)) + timeout = timeouts[TCP_CONNTRACK_CLOSE]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 35e61038ae96..af7dc6537758 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -22,8 +22,6 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_seqadj.h> @@ -35,8 +33,6 @@ static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; static DEFINE_MUTEX(nf_nat_proto_mutex); -static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] - __read_mostly; static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; @@ -58,16 +54,75 @@ struct nat_net { struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO]; }; -inline const struct nf_nat_l3proto * -__nf_nat_l3proto_find(u8 family) +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) { - return rcu_dereference(nf_nat_l3protos[family]); + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} + +static void nf_nat_ipv6_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ +#if IS_ENABLED(CONFIG_IPV6) + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi6 *fl6 = &fl->u.ip6; + + if (ct->status & statusbit) { + fl6->daddr = t->dst.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl6->saddr = t->src.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_sport = t->src.u.all; + } +#endif } -#ifdef CONFIG_XFRM static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) { - const struct nf_nat_l3proto *l3proto; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; enum ip_conntrack_dir dir; @@ -79,17 +134,20 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) return; family = nf_ct_l3num(ct); - l3proto = __nf_nat_l3proto_find(family); - if (l3proto == NULL) - return; - dir = CTINFO2DIR(ctinfo); if (dir == IP_CT_DIR_ORIGINAL) statusbit = IPS_DST_NAT; else statusbit = IPS_SRC_NAT; - l3proto->decode_session(skb, ct, dir, statusbit, fl); + switch (family) { + case NFPROTO_IPV4: + nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl); + return; + case NFPROTO_IPV6: + nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl); + return; + } } int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) @@ -182,7 +240,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, __be16 port; switch (tuple->dst.protonum) { - case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMP: case IPPROTO_ICMPV6: return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); @@ -631,23 +689,6 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); -static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, - enum nf_nat_manip_type mtype, - enum ip_conntrack_dir dir) -{ - const struct nf_nat_l3proto *l3proto; - struct nf_conntrack_tuple target; - - /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); - - l3proto = __nf_nat_l3proto_find(target.src.l3num); - if (!l3proto->manip_pkt(skb, 0, &target, mtype)) - return NF_DROP; - - return NF_ACCEPT; -} - /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -798,33 +839,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 0; } -static void nf_nat_l3proto_clean(u8 l3proto) -{ - struct nf_nat_proto_clean clean = { - .l3proto = l3proto, - }; - - nf_ct_iterate_destroy(nf_nat_proto_remove, &clean); -} - -int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) -{ - RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); - return 0; -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_register); - -void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto) -{ - mutex_lock(&nf_nat_proto_mutex); - RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL); - mutex_unlock(&nf_nat_proto_mutex); - synchronize_rcu(); - - nf_nat_l3proto_clean(l3proto->l3proto); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); - /* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { @@ -887,10 +901,43 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range2 *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} + +static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range2 *range) +{ + if (tb[CTA_NAT_V6_MINIP]) { + nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], + sizeof(struct in6_addr)); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V6_MAXIP]) + nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], + sizeof(struct in6_addr)); + else + range->max_addr = range->min_addr; + + return 0; +} + static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range2 *range, - const struct nf_nat_l3proto *l3proto) + const struct nf_conn *ct, struct nf_nat_range2 *range) { struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -901,8 +948,19 @@ nfnetlink_parse_nat(const struct nlattr *nat, if (err < 0) return err; - err = l3proto->nlattr_to_range(tb, range); - if (err < 0) + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + err = nf_nat_ipv4_nlattr_to_range(tb, range); + break; + case NFPROTO_IPV6: + err = nf_nat_ipv6_nlattr_to_range(tb, range); + break; + default: + err = -EPROTONOSUPPORT; + break; + } + + if (err) return err; if (!tb[CTA_NAT_PROTO]) @@ -918,7 +976,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) { struct nf_nat_range2 range; - const struct nf_nat_l3proto *l3proto; int err; /* Should not happen, restricted to creating new conntracks @@ -927,18 +984,11 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, if (WARN_ON_ONCE(nf_nat_initialized(ct, manip))) return -EEXIST; - /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to - * attach the null binding, otherwise this may oops. - */ - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - if (l3proto == NULL) - return -EAGAIN; - /* No NAT information has been passed, allocate the null-binding */ if (attr == NULL) return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0; - err = nfnetlink_parse_nat(attr, ct, &range, l3proto); + err = nfnetlink_parse_nat(attr, ct, &range); if (err < 0) return err; @@ -1035,7 +1085,6 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, mutex_unlock(&nf_nat_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_nat_register_fn); void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, unsigned int ops_count) @@ -1084,7 +1133,6 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, unlock: mutex_unlock(&nf_nat_proto_mutex); } -EXPORT_SYMBOL_GPL(nf_nat_unregister_fn); static struct pernet_operations nat_net_ops = { .id = &nat_net_id, diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 38793b95d9bc..ccc06f7539d7 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -22,9 +22,6 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> -#include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_helper.h> /* Frobs data inside this packet, which is linear. */ @@ -98,7 +95,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len, bool adjust) { - const struct nf_nat_l3proto *l3proto; struct tcphdr *tcph; int oldlen, datalen; @@ -118,9 +114,8 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, datalen = skb->len - protoff; - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check, - datalen, oldlen); + nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, + tcph, &tcph->check, datalen, oldlen); if (adjust && rep_len != match_len) nf_ct_seqadj_set(ct, ctinfo, tcph->seq, @@ -150,7 +145,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - const struct nf_nat_l3proto *l3proto; struct udphdr *udph; int datalen, oldlen; @@ -176,9 +170,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return true; - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, - datalen, oldlen); + nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, + udph, &udph->check, datalen, oldlen); return true; } diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/netfilter/nf_nat_masquerade.c index 41327bb99093..86fa4dcc63c5 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -1,25 +1,17 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ +// SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/atomic.h> #include <linux/inetdevice.h> -#include <linux/ip.h> -#include <linux/timer.h> #include <linux/netfilter.h> -#include <net/protocol.h> -#include <net/ip.h> -#include <net/checksum.h> -#include <net/route.h> #include <linux/netfilter_ipv4.h> -#include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_nat.h> +#include <linux/netfilter_ipv6.h> + #include <net/netfilter/ipv4/nf_nat_masquerade.h> +#include <net/netfilter/ipv6/nf_nat_masquerade.h> + +static DEFINE_MUTEX(masq_mutex); +static unsigned int masq_refcnt __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -78,8 +70,6 @@ static int device_cmp(struct nf_conn *i, void *ifindex) if (!nat) return 0; - if (nf_ct_l3num(i) != NFPROTO_IPV4) - return 0; return nat->masq_index == (int)(long)ifindex; } @@ -95,7 +85,6 @@ static int masq_device_event(struct notifier_block *this, * conntracks which were associated with that device, * and forget them. */ - WARN_ON(dev->ifindex == 0); nf_ct_iterate_cleanup_net(net, device_cmp, (void *)(long)dev->ifindex, 0, 0); @@ -147,9 +136,6 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static int masq_refcnt; -static DEFINE_MUTEX(masq_mutex); - int nf_nat_masquerade_ipv4_register_notifier(void) { int ret = 0; @@ -194,3 +180,183 @@ out_unlock: mutex_unlock(&masq_mutex); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); + +#if IS_ENABLED(CONFIG_IPV6) +static atomic_t v6_worker_count __read_mostly; + +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, + const struct net_device *out) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range2 newrange; + + ct = nf_ct_get(skb, &ctinfo); + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); + + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); + +struct masq_dev_work { + struct work_struct work; + struct net *net; + struct in6_addr addr; + int ifindex; +}; + +static int inet6_cmp(struct nf_conn *ct, void *work) +{ + struct masq_dev_work *w = (struct masq_dev_work *)work; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)w->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); +} + +static void iterate_cleanup_work(struct work_struct *work) +{ + struct masq_dev_work *w; + + w = container_of(work, struct masq_dev_work, work); + + nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); + + put_net(w->net); + kfree(w); + atomic_dec(&v6_worker_count); + module_put(THIS_MODULE); +} + +/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). + * + * Defer it to the system workqueue. + * + * As we can have 'a lot' of inet_events (depending on amount of ipv6 + * addresses being deleted), we also need to limit work item queue. + */ +static int masq_inet6_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + const struct net_device *dev; + struct masq_dev_work *w; + struct net *net; + + if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) + return NOTIFY_DONE; + + dev = ifa->idev->dev; + net = maybe_get_net(dev_net(dev)); + if (!net) + return NOTIFY_DONE; + + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (w) { + atomic_inc(&v6_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = dev->ifindex; + w->net = net; + w->addr = ifa->addr; + schedule_work(&w->work); + + return NOTIFY_DONE; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); + return NOTIFY_DONE; +} + +static struct notifier_block masq_inet6_notifier = { + .notifier_call = masq_inet6_event, +}; + +int nf_nat_masquerade_ipv6_register_notifier(void) +{ + int ret = 0; + + mutex_lock(&masq_mutex); + /* check if the notifier is already set */ + if (++masq_refcnt > 1) + goto out_unlock; + + ret = register_netdevice_notifier(&masq_dev_notifier); + if (ret) + goto err_dec; + + ret = register_inet6addr_notifier(&masq_inet6_notifier); + if (ret) + goto err_unregister; + + mutex_unlock(&masq_mutex); + return ret; + +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); +err_dec: + masq_refcnt--; +out_unlock: + mutex_unlock(&masq_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); + +void nf_nat_masquerade_ipv6_unregister_notifier(void) +{ + mutex_lock(&masq_mutex); + /* check if the notifier still has clients */ + if (--masq_refcnt > 0) + goto out_unlock; + + unregister_inet6addr_notifier(&masq_inet6_notifier); + unregister_netdevice_notifier(&masq_dev_notifier); +out_unlock: + mutex_unlock(&masq_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); +#endif diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index f83bf9d8c9f5..62743da3004f 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -20,13 +20,26 @@ #include <linux/netfilter.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> + +#include <linux/ipv6.h> +#include <linux/netfilter_ipv6.h> +#include <net/checksum.h> +#include <net/ip6_checksum.h> +#include <net/ip6_route.h> +#include <net/xfrm.h> +#include <net/ipv6.h> + +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static void nf_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype); static void __udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, struct udphdr *hdr, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, bool do_csum) @@ -43,8 +56,7 @@ __udp_manip_pkt(struct sk_buff *skb, portptr = &hdr->dest; } if (do_csum) { - l3proto->csum_update(skb, iphdroff, &hdr->check, - tuple, maniptype); + nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) @@ -54,7 +66,6 @@ __udp_manip_pkt(struct sk_buff *skb, } static bool udp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -68,12 +79,11 @@ static bool udp_manip_pkt(struct sk_buff *skb, hdr = (struct udphdr *)(skb->data + hdroff); do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum); + __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum); return true; } static bool udplite_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -85,14 +95,13 @@ static bool udplite_manip_pkt(struct sk_buff *skb, return false; hdr = (struct udphdr *)(skb->data + hdroff); - __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true); + __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true); #endif return true; } static bool sctp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -135,7 +144,6 @@ sctp_manip_pkt(struct sk_buff *skb, static bool tcp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -171,14 +179,13 @@ tcp_manip_pkt(struct sk_buff *skb, if (hdrsize < sizeof(*hdr)) return true; - l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } static bool dccp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -210,8 +217,7 @@ dccp_manip_pkt(struct sk_buff *skb, if (hdrsize < sizeof(*hdr)) return true; - l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, - tuple, maniptype); + nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, false); #endif @@ -220,7 +226,6 @@ dccp_manip_pkt(struct sk_buff *skb, static bool icmp_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -239,7 +244,6 @@ icmp_manip_pkt(struct sk_buff *skb, static bool icmpv6_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -250,8 +254,7 @@ icmpv6_manip_pkt(struct sk_buff *skb, return false; hdr = (struct icmp6hdr *)(skb->data + hdroff); - l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, - tuple, maniptype); + nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype); if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST || hdr->icmp6_type == ICMPV6_ECHO_REPLY) { inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, @@ -265,7 +268,6 @@ icmpv6_manip_pkt(struct sk_buff *skb, /* manipulate a GRE packet according to maniptype */ static bool gre_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) @@ -304,40 +306,718 @@ gre_manip_pkt(struct sk_buff *skb, return true; } -bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb, - const struct nf_nat_l3proto *l3proto, +static bool l4proto_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { switch (tuple->dst.protonum) { case IPPROTO_TCP: - return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff, + return tcp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_UDP: - return udp_manip_pkt(skb, l3proto, iphdroff, hdroff, + return udp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_UDPLITE: - return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff, + return udplite_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_SCTP: - return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff, + return sctp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_ICMP: - return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff, + return icmp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_ICMPV6: - return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff, + return icmpv6_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_DCCP: - return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff, + return dccp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); case IPPROTO_GRE: - return gre_manip_pkt(skb, l3proto, iphdroff, hdroff, + return gre_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); } /* If we don't know protocol -- no error, pass it unmodified. */ return true; } -EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt); + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ipv6h; + __be16 frag_off; + int hdroff; + u8 nexthdr; + + if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) + return false; + + ipv6h = (void *)skb->data + iphdroff; + nexthdr = ipv6h->nexthdr; + hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), + &nexthdr, &frag_off); + if (hdroff < 0) + goto manip_addr; + + if ((frag_off & htons(~0x7)) == 0 && + !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype)) + return false; + + /* must reload, offset might have changed */ + ipv6h = (void *)skb->data + iphdroff; + +manip_addr: + if (maniptype == NF_NAT_MANIP_SRC) + ipv6h->saddr = target->src.u3.in6; + else + ipv6h->daddr = target->dst.u3.in6; + +#endif + return true; +} + +unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, + enum nf_nat_manip_type mtype, + enum ip_conntrack_dir dir) +{ + struct nf_conntrack_tuple target; + + /* We are aiming to look like inverse of other direction. */ + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + + switch (target.src.l3num) { + case NFPROTO_IPV6: + if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype)) + return NF_ACCEPT; + break; + case NFPROTO_IPV4: + if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype)) + return NF_ACCEPT; + break; + default: + WARN_ON_ONCE(1); + break; + } + + return NF_DROP; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, true); +} + +static void nf_nat_ipv6_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); + const struct in6_addr *oldip, *newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = &ipv6h->saddr; + newip = &t->src.u3.in6; + } else { + oldip = &ipv6h->daddr; + newip = &t->dst.u3.in6; + } + inet_proto_csum_replace16(check, skb, oldip->s6_addr32, + newip->s6_addr32, true); +#endif +} + +static void nf_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + switch (t->src.l3num) { + case NFPROTO_IPV4: + nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype); + return; + case NFPROTO_IPV6: + nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype); + return; + } +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + const struct iphdr *iph = ip_hdr(skb); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, + proto, 0); + } else { + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), true); + } +} + +#if IS_ENABLED(CONFIG_IPV6) +static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); + } else { + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), true); + } +} +#endif + +void nf_nat_csum_recalc(struct sk_buff *skb, + u8 nfproto, u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + switch (nfproto) { + case NFPROTO_IPV4: + nf_nat_ipv4_csum_recalc(skb, proto, data, check, + datalen, oldlen); + return; +#if IS_ENABLED(CONFIG_IPV6) + case NFPROTO_IPV6: + nf_nat_ipv6_csum_recalc(skb, proto, data, check, + datalen, oldlen); + return; +#endif + } + + WARN_ON_ONCE(1); +} + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + struct nf_conntrack_tuple target; + unsigned long statusbit; + + WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMP; + if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +static unsigned int +nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return NF_ACCEPT; + + if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + state->hook)) + return NF_DROP; + else + return NF_ACCEPT; + } + } + + return nf_nat_inet_fn(priv, skb, state); +} + +static unsigned int +nf_nat_ipv4_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + unsigned int ret; + __be32 daddr = ip_hdr(skb)->daddr; + + ret = nf_nat_ipv4_fn(priv, skb, state); + if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv4_out(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + int err; +#endif + unsigned int ret; + + ret = nf_nat_ipv4_fn(priv, skb, state); +#ifdef CONFIG_XFRM + if (ret != NF_ACCEPT) + return ret; + + if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) + return ret; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(state->net, skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + int err; + + ret = nf_nat_ipv4_fn(priv, skb, state); + if (ret != NF_ACCEPT) + return ret; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#ifdef CONFIG_XFRM + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(state->net, skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#endif + } + return ret; +} + +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_in, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_out, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_local_fn, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_fn, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops) +{ + return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn); + +void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn); + +#if IS_ENABLED(CONFIG_IPV6) +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen) +{ + struct { + struct icmp6hdr icmp6; + struct ipv6hdr ip6; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + struct nf_conntrack_tuple target; + unsigned long statusbit; + + WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), + &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + inside = (void *)skb->data + hdrlen; + inside->icmp6.icmp6_cksum = 0; + inside->icmp6.icmp6_cksum = + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + skb->len - hdrlen, IPPROTO_ICMPV6, + skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMPV6; + if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); + +static unsigned int +nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + state->hook, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + } + + return nf_nat_inet_fn(priv, skb, state); +} + +static unsigned int +nf_nat_ipv6_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(priv, skb, state); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv6_out(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + int err; +#endif + unsigned int ret; + + ret = nf_nat_ipv6_fn(priv, skb, state); +#ifdef CONFIG_XFRM + if (ret != NF_ACCEPT) + return ret; + + if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) + return ret; + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } +#endif + + return ret; +} + +static int nat_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + +static unsigned int +nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + int err; + + ret = nf_nat_ipv6_fn(priv, skb, state); + if (ret != NF_ACCEPT) + return ret; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + err = nat_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#endif + } + + return ret; +} + +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_in, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_out, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_local_fn, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_fn, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_NAT_SRC, + }, +}; + +int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) +{ + return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, + ARRAY_SIZE(nf_nat_ipv6_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn); + +void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) +{ + nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn); +#endif /* CONFIG_IPV6 */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e1a88ba2249e..faf6bd10a19f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2172,6 +2172,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, { struct nft_expr_info info; struct nft_expr *expr; + struct module *owner; int err; err = nf_tables_expr_parse(ctx, nla, &info); @@ -2191,7 +2192,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err3: kfree(expr); err2: - module_put(info.ops->type->owner); + owner = info.ops->type->owner; + if (info.ops->type->release_ops) + info.ops->type->release_ops(info.ops); + + module_put(owner); err1: return ERR_PTR(err); } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2a00aef7b6d4..d0f168c2670f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -98,21 +98,23 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { struct nft_base_chain *base_chain; + struct nft_stats __percpu *pstats; struct nft_stats *stats; base_chain = nft_base_chain(chain); - if (!rcu_access_pointer(base_chain->stats)) - return; - local_bh_disable(); - stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); - if (stats) { + rcu_read_lock(); + pstats = READ_ONCE(base_chain->stats); + if (pstats) { + local_bh_disable(); + stats = this_cpu_ptr(pstats); u64_stats_update_begin(&stats->syncp); stats->pkts++; stats->bytes += pkt->skb->len; u64_stats_update_end(&stats->syncp); + local_bh_enable(); } - local_bh_enable(); + rcu_read_unlock(); } struct nft_jumpstack { @@ -221,7 +223,6 @@ next_rule: chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: - /* fall through */ case NFT_RETURN: nft_trace_packet(&info, chain, rule, NFT_TRACETYPE_RETURN); diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c new file mode 100644 index 000000000000..ee4852088d50 --- /dev/null +++ b/net/netfilter/nft_chain_nat.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/module.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_tables_ipv6.h> + +static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (state->pf) { +#ifdef CONFIG_NF_TABLES_IPV4 + case NFPROTO_IPV4: + nft_set_pktinfo_ipv4(&pkt, skb); + break; +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + case NFPROTO_IPV6: + nft_set_pktinfo_ipv6(&pkt, skb); + break; +#endif + default: + break; + } + + return nft_do_chain(&pkt, priv); +} + +#ifdef CONFIG_NF_TABLES_IPV4 +static const struct nft_chain_type nft_chain_nat_ipv4 = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_do_chain, + [NF_INET_POST_ROUTING] = nft_nat_do_chain, + [NF_INET_LOCAL_OUT] = nft_nat_do_chain, + [NF_INET_LOCAL_IN] = nft_nat_do_chain, + }, + .ops_register = nf_nat_ipv4_register_fn, + .ops_unregister = nf_nat_ipv4_unregister_fn, +}; +#endif + +#ifdef CONFIG_NF_TABLES_IPV6 +static const struct nft_chain_type nft_chain_nat_ipv6 = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .hooks = { + [NF_INET_PRE_ROUTING] = nft_nat_do_chain, + [NF_INET_POST_ROUTING] = nft_nat_do_chain, + [NF_INET_LOCAL_OUT] = nft_nat_do_chain, + [NF_INET_LOCAL_IN] = nft_nat_do_chain, + }, + .ops_register = nf_nat_ipv6_register_fn, + .ops_unregister = nf_nat_ipv6_unregister_fn, +}; +#endif + +static int __init nft_chain_nat_init(void) +{ +#ifdef CONFIG_NF_TABLES_IPV6 + nft_register_chain_type(&nft_chain_nat_ipv6); +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + nft_register_chain_type(&nft_chain_nat_ipv4); +#endif + + return 0; +} + +static void __exit nft_chain_nat_exit(void) +{ +#ifdef CONFIG_NF_TABLES_IPV4 + nft_unregister_chain_type(&nft_chain_nat_ipv4); +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_chain_type(&nft_chain_nat_ipv6); +#endif +} + +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); + +MODULE_LICENSE("GPL"); +#ifdef CONFIG_NF_TABLES_IPV4 +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); +#endif +#ifdef CONFIG_NF_TABLES_IPV6 +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); +#endif diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 0a4bad55a8aa..469f9da5073b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -22,23 +22,6 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_arp/arp_tables.h> #include <net/netfilter/nf_tables.h> -#include <net/netns/generic.h> - -struct nft_xt { - struct list_head head; - struct nft_expr_ops ops; - refcount_t refcnt; - - /* used only when transaction mutex is locked */ - unsigned int listcnt; - - /* Unlike other expressions, ops doesn't have static storage duration. - * nft core assumes they do. We use kfree_rcu so that nft core can - * can check expr->ops->size even after nft_compat->destroy() frees - * the nft_xt struct that holds the ops structure. - */ - struct rcu_head rcu_head; -}; /* Used for matches where *info is larger than X byte */ #define NFT_MATCH_LARGE_THRESH 192 @@ -47,46 +30,6 @@ struct nft_xt_match_priv { void *info; }; -struct nft_compat_net { - struct list_head nft_target_list; - struct list_head nft_match_list; -}; - -static unsigned int nft_compat_net_id __read_mostly; -static struct nft_expr_type nft_match_type; -static struct nft_expr_type nft_target_type; - -static struct nft_compat_net *nft_compat_pernet(struct net *net) -{ - return net_generic(net, nft_compat_net_id); -} - -static void nft_xt_get(struct nft_xt *xt) -{ - /* refcount_inc() warns on 0 -> 1 transition, but we can't - * init the reference count to 1 in .select_ops -- we can't - * undo such an increase when another expression inside the same - * rule fails afterwards. - */ - if (xt->listcnt == 0) - refcount_set(&xt->refcnt, 1); - else - refcount_inc(&xt->refcnt); - - xt->listcnt++; -} - -static bool nft_xt_put(struct nft_xt *xt) -{ - if (refcount_dec_and_test(&xt->refcnt)) { - WARN_ON_ONCE(!list_empty(&xt->head)); - kfree_rcu(xt, rcu_head); - return true; - } - - return false; -} - static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx, const char *tablename) { @@ -281,7 +224,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -305,8 +247,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (!target->target) return -EINVAL; - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); return 0; } @@ -325,8 +265,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(me); + module_put(me); + kfree(expr->ops); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -499,7 +439,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); - struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -515,13 +454,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - ret = xt_check_match(&par, size, proto, inv); - if (ret < 0) - return ret; - - nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt_get(nft_xt); - return 0; + return xt_check_match(&par, size, proto, inv); } static int @@ -564,8 +497,8 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, if (par.match->destroy != NULL) par.match->destroy(&par); - if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(me); + module_put(me); + kfree(expr->ops); } static void @@ -574,18 +507,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } -static void nft_compat_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - enum nft_trans_phase phase) -{ - struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); - - if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { - if (--xt->listcnt == 0) - list_del_init(&xt->head); - } -} - static void nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -780,19 +701,13 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { .cb = nfnl_nft_compat_cb, }; -static bool nft_match_cmp(const struct xt_match *match, - const char *name, u32 rev, u32 family) -{ - return strcmp(match->name, name) == 0 && match->revision == rev && - (match->family == NFPROTO_UNSPEC || match->family == family); -} +static struct nft_expr_type nft_match_type; static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { - struct nft_compat_net *cn; - struct nft_xt *nft_match; + struct nft_expr_ops *ops; struct xt_match *match; unsigned int matchsize; char *mt_name; @@ -808,16 +723,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); family = ctx->family; - cn = nft_compat_pernet(ctx->net); - - /* Re-use the existing match if it's already loaded. */ - list_for_each_entry(nft_match, &cn->nft_match_list, head) { - struct xt_match *match = nft_match->ops.data; - - if (nft_match_cmp(match, mt_name, rev, family)) - return &nft_match->ops; - } - match = xt_request_find_match(family, mt_name, rev); if (IS_ERR(match)) return ERR_PTR(-ENOENT); @@ -827,65 +732,62 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - /* This is the first time we use this match, allocate operations */ - nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_match == NULL) { + ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL); + if (!ops) { err = -ENOMEM; goto err; } - refcount_set(&nft_match->refcnt, 0); - nft_match->ops.type = &nft_match_type; - nft_match->ops.eval = nft_match_eval; - nft_match->ops.init = nft_match_init; - nft_match->ops.destroy = nft_match_destroy; - nft_match->ops.deactivate = nft_compat_deactivate; - nft_match->ops.dump = nft_match_dump; - nft_match->ops.validate = nft_match_validate; - nft_match->ops.data = match; + ops->type = &nft_match_type; + ops->eval = nft_match_eval; + ops->init = nft_match_init; + ops->destroy = nft_match_destroy; + ops->dump = nft_match_dump; + ops->validate = nft_match_validate; + ops->data = match; matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); if (matchsize > NFT_MATCH_LARGE_THRESH) { matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv)); - nft_match->ops.eval = nft_match_large_eval; - nft_match->ops.init = nft_match_large_init; - nft_match->ops.destroy = nft_match_large_destroy; - nft_match->ops.dump = nft_match_large_dump; + ops->eval = nft_match_large_eval; + ops->init = nft_match_large_init; + ops->destroy = nft_match_large_destroy; + ops->dump = nft_match_large_dump; } - nft_match->ops.size = matchsize; + ops->size = matchsize; - nft_match->listcnt = 0; - list_add(&nft_match->head, &cn->nft_match_list); - - return &nft_match->ops; + return ops; err: module_put(match->me); return ERR_PTR(err); } +static void nft_match_release_ops(const struct nft_expr_ops *ops) +{ + struct xt_match *match = ops->data; + + module_put(match->me); + kfree(ops); +} + static struct nft_expr_type nft_match_type __read_mostly = { .name = "match", .select_ops = nft_match_select_ops, + .release_ops = nft_match_release_ops, .policy = nft_match_policy, .maxattr = NFTA_MATCH_MAX, .owner = THIS_MODULE, }; -static bool nft_target_cmp(const struct xt_target *tg, - const char *name, u32 rev, u32 family) -{ - return strcmp(tg->name, name) == 0 && tg->revision == rev && - (tg->family == NFPROTO_UNSPEC || tg->family == family); -} +static struct nft_expr_type nft_target_type; static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { - struct nft_compat_net *cn; - struct nft_xt *nft_target; + struct nft_expr_ops *ops; struct xt_target *target; char *tg_name; u32 rev, family; @@ -905,18 +807,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, strcmp(tg_name, "standard") == 0) return ERR_PTR(-EINVAL); - cn = nft_compat_pernet(ctx->net); - /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &cn->nft_target_list, head) { - struct xt_target *target = nft_target->ops.data; - - if (!target->target) - continue; - - if (nft_target_cmp(target, tg_name, rev, family)) - return &nft_target->ops; - } - target = xt_request_find_target(family, tg_name, rev); if (IS_ERR(target)) return ERR_PTR(-ENOENT); @@ -931,113 +821,55 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - /* This is the first time we use this target, allocate operations */ - nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_target == NULL) { + ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL); + if (!ops) { err = -ENOMEM; goto err; } - refcount_set(&nft_target->refcnt, 0); - nft_target->ops.type = &nft_target_type; - nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); - nft_target->ops.init = nft_target_init; - nft_target->ops.destroy = nft_target_destroy; - nft_target->ops.deactivate = nft_compat_deactivate; - nft_target->ops.dump = nft_target_dump; - nft_target->ops.validate = nft_target_validate; - nft_target->ops.data = target; + ops->type = &nft_target_type; + ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); + ops->init = nft_target_init; + ops->destroy = nft_target_destroy; + ops->dump = nft_target_dump; + ops->validate = nft_target_validate; + ops->data = target; if (family == NFPROTO_BRIDGE) - nft_target->ops.eval = nft_target_eval_bridge; + ops->eval = nft_target_eval_bridge; else - nft_target->ops.eval = nft_target_eval_xt; - - nft_target->listcnt = 0; - list_add(&nft_target->head, &cn->nft_target_list); + ops->eval = nft_target_eval_xt; - return &nft_target->ops; + return ops; err: module_put(target->me); return ERR_PTR(err); } +static void nft_target_release_ops(const struct nft_expr_ops *ops) +{ + struct xt_target *target = ops->data; + + module_put(target->me); + kfree(ops); +} + static struct nft_expr_type nft_target_type __read_mostly = { .name = "target", .select_ops = nft_target_select_ops, + .release_ops = nft_target_release_ops, .policy = nft_target_policy, .maxattr = NFTA_TARGET_MAX, .owner = THIS_MODULE, }; -static int __net_init nft_compat_init_net(struct net *net) -{ - struct nft_compat_net *cn = nft_compat_pernet(net); - - INIT_LIST_HEAD(&cn->nft_target_list); - INIT_LIST_HEAD(&cn->nft_match_list); - - return 0; -} - -static void __net_exit nft_compat_exit_net(struct net *net) -{ - struct nft_compat_net *cn = nft_compat_pernet(net); - struct nft_xt *xt, *next; - - if (list_empty(&cn->nft_match_list) && - list_empty(&cn->nft_target_list)) - return; - - /* If there was an error that caused nft_xt expr to not be initialized - * fully and noone else requested the same expression later, the lists - * contain 0-refcount entries that still hold module reference. - * - * Clean them here. - */ - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) { - struct xt_target *target = xt->ops.data; - - list_del_init(&xt->head); - - if (refcount_read(&xt->refcnt)) - continue; - module_put(target->me); - kfree(xt); - } - - list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) { - struct xt_match *match = xt->ops.data; - - list_del_init(&xt->head); - - if (refcount_read(&xt->refcnt)) - continue; - module_put(match->me); - kfree(xt); - } - mutex_unlock(&net->nft.commit_mutex); -} - -static struct pernet_operations nft_compat_net_ops = { - .init = nft_compat_init_net, - .exit = nft_compat_exit_net, - .id = &nft_compat_net_id, - .size = sizeof(struct nft_compat_net), -}; - static int __init nft_compat_module_init(void) { int ret; - ret = register_pernet_subsys(&nft_compat_net_ops); - if (ret < 0) - goto err_target; - ret = nft_register_expr(&nft_match_type); if (ret < 0) - goto err_pernet; + return ret; ret = nft_register_expr(&nft_target_type); if (ret < 0) @@ -1054,8 +886,6 @@ err_target: nft_unregister_expr(&nft_target_type); err_match: nft_unregister_expr(&nft_match_type); -err_pernet: - unregister_pernet_subsys(&nft_compat_net_ops); return ret; } @@ -1064,7 +894,6 @@ static void __exit nft_compat_module_exit(void) nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); - unregister_pernet_subsys(&nft_compat_net_ops); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9d8655bc1bea..bee156eaa400 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -14,18 +14,24 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_masq.h> +#include <net/netfilter/ipv4/nf_nat_masquerade.h> +#include <net/netfilter/ipv6/nf_nat_masquerade.h> -const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { +struct nft_masq { + u32 flags; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; +}; + +static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 }, [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 }, }; -EXPORT_SYMBOL_GPL(nft_masq_policy); -int nft_masq_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) +static int nft_masq_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { int err; @@ -36,11 +42,10 @@ int nft_masq_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_POST_ROUTING)); } -EXPORT_SYMBOL_GPL(nft_masq_validate); -int nft_masq_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_masq_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); struct nft_masq *priv = nft_expr_priv(expr); @@ -75,9 +80,8 @@ int nft_masq_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -EXPORT_SYMBOL_GPL(nft_masq_init); -int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_masq *priv = nft_expr_priv(expr); @@ -98,7 +102,157 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nft_masq_dump); + +static void nft_masq_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_masq *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; + + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); + } + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), + &range, nft_out(pkt)); +} + +static void +nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV4); +} + +static struct nft_expr_type nft_masq_ipv4_type; +static const struct nft_expr_ops nft_masq_ipv4_ops = { + .type = &nft_masq_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_ipv4_eval, + .init = nft_masq_init, + .destroy = nft_masq_ipv4_destroy, + .dump = nft_masq_dump, + .validate = nft_masq_validate, +}; + +static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "masq", + .ops = &nft_masq_ipv4_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +#ifdef CONFIG_NF_TABLES_IPV6 +static void nft_masq_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_masq *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; + + memset(&range, 0, sizeof(range)); + range.flags = priv->flags; + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); + } + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); +} + +static void +nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); +} + +static struct nft_expr_type nft_masq_ipv6_type; +static const struct nft_expr_ops nft_masq_ipv6_ops = { + .type = &nft_masq_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_ipv6_eval, + .init = nft_masq_init, + .destroy = nft_masq_ipv6_destroy, + .dump = nft_masq_dump, + .validate = nft_masq_validate, +}; + +static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "masq", + .ops = &nft_masq_ipv6_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_masq_module_init_ipv6(void) +{ + int ret = nft_register_expr(&nft_masq_ipv6_type); + + if (ret) + return ret; + + ret = nf_nat_masquerade_ipv6_register_notifier(); + if (ret < 0) + nft_unregister_expr(&nft_masq_ipv6_type); + + return ret; +} + +static void nft_masq_module_exit_ipv6(void) +{ + nft_unregister_expr(&nft_masq_ipv6_type); + nf_nat_masquerade_ipv6_unregister_notifier(); +} +#else +static inline int nft_masq_module_init_ipv6(void) { return 0; } +static inline void nft_masq_module_exit_ipv6(void) {} +#endif + +static int __init nft_masq_module_init(void) +{ + int ret; + + ret = nft_masq_module_init_ipv6(); + if (ret < 0) + return ret; + + ret = nft_register_expr(&nft_masq_ipv4_type); + if (ret < 0) { + nft_masq_module_exit_ipv6(); + return ret; + } + + ret = nf_nat_masquerade_ipv4_register_notifier(); + if (ret < 0) { + nft_masq_module_exit_ipv6(); + nft_unregister_expr(&nft_masq_ipv4_type); + return ret; + } + + return ret; +} + +static void __exit nft_masq_module_exit(void) +{ + nft_masq_module_exit_ipv6(); + nft_unregister_expr(&nft_masq_ipv4_type); + nf_nat_masquerade_ipv4_unregister_notifier(); +} + +module_init(nft_masq_module_init); +module_exit(nft_masq_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index c15807d10b91..e93aed9bda88 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -21,9 +21,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat_l3proto.h> #include <net/ip.h> struct nft_nat { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index c64cbe78dee7..f8092926f704 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -13,19 +13,24 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_redirect.h> #include <net/netfilter/nf_tables.h> -#include <net/netfilter/nft_redir.h> -const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { +struct nft_redir { + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + u16 flags; +}; + +static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, [NFTA_REDIR_FLAGS] = { .type = NLA_U32 }, }; -EXPORT_SYMBOL_GPL(nft_redir_policy); -int nft_redir_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) +static int nft_redir_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { int err; @@ -37,11 +42,10 @@ int nft_redir_validate(const struct nft_ctx *ctx, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)); } -EXPORT_SYMBOL_GPL(nft_redir_validate); -int nft_redir_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); unsigned int plen; @@ -77,7 +81,6 @@ int nft_redir_init(const struct nft_ctx *ctx, return nf_ct_netns_get(ctx->net, ctx->family); } -EXPORT_SYMBOL_GPL(nft_redir_init); int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -101,7 +104,134 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) nla_put_failure: return -1; } -EXPORT_SYMBOL_GPL(nft_redir_dump); + +static void nft_redir_ipv4_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_ipv4_multi_range_compat mr; + + memset(&mr, 0, sizeof(mr)); + if (priv->sreg_proto_min) { + mr.range[0].min.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + mr.range[0].max.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); + mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + mr.range[0].flags |= priv->flags; + + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); +} + +static void +nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV4); +} + +static struct nft_expr_type nft_redir_ipv4_type; +static const struct nft_expr_ops nft_redir_ipv4_ops = { + .type = &nft_redir_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv4_eval, + .init = nft_redir_init, + .destroy = nft_redir_ipv4_destroy, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "redir", + .ops = &nft_redir_ipv4_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +#ifdef CONFIG_NF_TABLES_IPV6 +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range2 range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + regs->verdict.code = + nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); +} + +static void +nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .destroy = nft_redir_ipv6_destroy, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; +#endif + +static int __init nft_redir_module_init(void) +{ + int ret = nft_register_expr(&nft_redir_ipv4_type); + + if (ret) + return ret; + +#ifdef CONFIG_NF_TABLES_IPV6 + ret = nft_register_expr(&nft_redir_ipv6_type); + if (ret) { + nft_unregister_expr(&nft_redir_ipv4_type); + return ret; + } +#endif + + return ret; +} + +static void __exit nft_redir_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv4_type); +#ifdef CONFIG_NF_TABLES_IPV6 + nft_unregister_expr(&nft_redir_ipv6_type); +#endif +} + +module_init(nft_redir_module_init); +module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 339a9dd1c832..03df08801e28 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -442,15 +442,6 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set, return ERR_PTR(-ENOENT); } -/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ -static inline u32 nft_hash_key(const u32 *key, u32 klen) -{ - if (klen == 4) - return *key; - - return *(u16 *)key; -} - static bool nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) @@ -460,11 +451,11 @@ static bool nft_hash_lookup_fast(const struct net *net, const struct nft_hash_elem *he; u32 hash, k1, k2; - k1 = nft_hash_key(key, set->klen); + k1 = *key; hash = jhash_1word(k1, priv->seed); hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry_rcu(he, &priv->table[hash], node) { - k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen); + k2 = *(u32 *)nft_set_ext_key(&he->ext)->data; if (k1 == k2 && nft_set_elem_active(&he->ext, genmask)) { *ext = &he->ext; @@ -474,6 +465,23 @@ static bool nft_hash_lookup_fast(const struct net *net, return false; } +static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, + const struct nft_set_ext *ext) +{ + const struct nft_data *key = nft_set_ext_key(ext); + u32 hash, k1; + + if (set->klen == 4) { + k1 = *(u32 *)key; + hash = jhash_1word(k1, priv->seed); + } else { + hash = jhash(key, set->klen, priv->seed); + } + hash = reciprocal_scale(hash, priv->buckets); + + return hash; +} + static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -483,8 +491,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && @@ -523,10 +530,9 @@ static void *nft_hash_deactivate(const struct net *net, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { - if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, + if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val, set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index ea28588c5eed..b113fcac94e1 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -406,6 +406,13 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx, return -ENOMEM; memcpy(&md->u.tun_info, &info, sizeof(info)); +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL); + if (err < 0) { + metadata_dst_free(md); + return err; + } +#endif ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len, priv->opts.flags); priv->md = md; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 13e1ac333fa4..e5e5c64df8d1 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -461,7 +461,7 @@ int xt_check_proc_name(const char *name, unsigned int size) EXPORT_SYMBOL(xt_check_proc_name); int xt_check_match(struct xt_mtchk_param *par, - unsigned int size, u_int8_t proto, bool inv_proto) + unsigned int size, u16 proto, bool inv_proto) { int ret; @@ -984,7 +984,7 @@ bool xt_find_jump_offset(const unsigned int *offsets, EXPORT_SYMBOL(xt_find_jump_offset); int xt_check_target(struct xt_tgchk_param *par, - unsigned int size, u_int8_t proto, bool inv_proto) + unsigned int size, u16 proto, bool inv_proto) { int ret; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index eb4cbd244c3d..5f9b37e12801 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -41,19 +41,13 @@ #include <linux/workqueue.h> #include <linux/sysfs.h> -struct idletimer_tg_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); -}; - struct idletimer_tg { struct list_head entry; struct timer_list timer; struct work_struct work; struct kobject *kobj; - struct idletimer_tg_attr attr; + struct device_attribute attr; unsigned int refcnt; }; @@ -76,15 +70,15 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) return NULL; } -static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, - char *buf) +static ssize_t idletimer_tg_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct idletimer_tg *timer; unsigned long expires = 0; mutex_lock(&list_mutex); - timer = __idletimer_tg_find_by_label(attr->name); + timer = __idletimer_tg_find_by_label(attr->attr.name); if (timer) expires = timer->timer.expires; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index ac91170fc8c8..61eabd171186 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -14,7 +14,7 @@ #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> -#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat.h> static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) { diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 89da9512ec1e..ac1cc6e38170 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -8,8 +8,6 @@ config OPENVSWITCH depends on !NF_CONNTRACK || \ (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (!NF_NAT || NF_NAT) && \ - (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ - (!NF_NAT_IPV6 || NF_NAT_IPV6) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) select LIBCRC32C select MPLS diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 35884f836260..1b6896896fff 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -29,9 +29,7 @@ #include <net/ipv6_frag.h> #ifdef CONFIG_NF_NAT_NEEDED -#include <linux/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat.h> #endif #include "datapath.h" @@ -745,14 +743,14 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + if (IS_ENABLED(CONFIG_NF_NAT) && skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto push; - } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; @@ -1673,7 +1671,7 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, } if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { - if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + if (IS_ENABLED(CONFIG_NF_NAT) && info->family == NFPROTO_IPV4) { if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) || @@ -1682,7 +1680,7 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip)))) return false; - } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + } else if (IS_ENABLED(CONFIG_IPV6) && info->family == NFPROTO_IPV6) { if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, &info->range.min_addr.in6) || diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5821bdd98d20..e9c860d00416 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -17,8 +17,7 @@ CONFIG_VLAN_8021Q=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m -CONFIG_NF_NAT_IPV6=m -CONFIG_NF_NAT_IPV4=m +CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m |