From 8975ae88e137ea02a71b7a86af2f8eb790c2f1e7 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 14 Sep 2014 21:48:28 +0300 Subject: mac80211: fix warning on htmldocs for last_tdls_pkt_time Forgot to add an entry to the struct description of sta_info. Signed-off-by: Liad Kaufman Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 42f68cb8957e..bcda2ac7d844 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -336,6 +336,7 @@ struct ieee80211_tx_latency_stat { * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station + * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.3-58-ga151 From c12bc4885f4b3bab0ed779c69d5d7e3223fa5003 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 30 Sep 2014 07:08:02 +0300 Subject: mac80211: return the vif's chandef in ieee80211_cfg_get_channel() The chandef of the channel context a vif is using may be different than the chandef of the vif itself. For instance, the bandwidth used by the vif may be narrower than the one configured in the channel context. To avoid confusion, return the vif's chandef in ieee80211_cfg_get_channel() instead of the chandef of the channel context. Signed-off-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb6a1502b6df..343da1e35025 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3458,7 +3458,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - *chandef = chanctx_conf->def; + *chandef = sdata->vif.bss_conf.chandef; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && -- cgit v1.2.3-58-ga151 From bc37b16870a382e8b71d881444c19a16de1c1a7f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 7 Oct 2014 22:20:23 +0200 Subject: net: rfkill: kernel-doc warning fixes Correct the kernel-doc, the parameter is called "blocked" not "state". Signed-off-by: Fabian Frederick Signed-off-by: Johannes Berg --- net/rfkill/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b3b16c070a7f..fa7cd792791c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, @@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. -- cgit v1.2.3-58-ga151 From 252e07ca5f64dd31fdfca8027287e7d75fefdab1 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 8 Oct 2014 09:48:34 +0300 Subject: nl80211: sanity check the channel switch counter value The nl80211 channel switch count attribute (NL80211_ATTR_CH_SWITCH_COUNT) is specified as u32, but the specification uses u8 for the counter. To make sure strange things don't happen without informing the user, sanity check the value and return -EINVAL if it doesn't fit in u8. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cb9f5a44ffad..5839c85075f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5927,6 +5927,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) int err; bool need_new_beacon = false; int len, i; + u32 cs_count; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5963,7 +5964,14 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (need_new_beacon && !info->attrs[NL80211_ATTR_CSA_IES]) return -EINVAL; - params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + /* Even though the attribute is u32, the specification says + * u8, so let's make sure we don't overflow. + */ + cs_count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + if (cs_count > 255) + return -EINVAL; + + params.count = cs_count; if (!need_new_beacon) goto skip_beacons; -- cgit v1.2.3-58-ga151 From ab2d7251d666995740da17b2a51ca545ac5dd037 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Oct 2014 11:25:20 +0200 Subject: netfilter: missing module license in the nf_reject_ipvX modules [ 23.545204] nf_reject_ipv4: module license 'unspecified' taints kernel. Fixes: c8d7b98 ("netfilter: move nf_send_resetX() code to nf_reject_ipvX modules") Reported-by: Dave Young Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 3 +++ net/ipv6/netfilter/nf_reject_ipv6.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index b023b4eb1a96..92b303dbd5fc 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -125,3 +126,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) kfree_skb(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 5f5f0438d74d..20d9defc6c59 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -5,6 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#include #include #include #include @@ -161,3 +163,5 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) ip6_local_out(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset6); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3-58-ga151 From 7210e4e38f945dfa173c4a4e59ad827c9ecad541 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Oct 2014 19:50:22 +0200 Subject: netfilter: nf_tables: restrict nat/masq expressions to nat chain type This adds the missing validation code to avoid the use of nat/masq from non-nat chains. The validation assumes two possible configuration scenarios: 1) Use of nat from base chain that is not of nat type. Reject this configuration from the nft_*_init() path of the expression. 2) Use of nat from non-base chain. In this case, we have to wait until the non-base chain is referenced by at least one base chain via jump/goto. This is resolved from the nft_*_validate() path which is called from nf_tables_check_loops(). The user gets an -EOPNOTSUPP in both cases. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ include/net/netfilter/nft_masq.h | 3 +++ net/ipv4/netfilter/nft_masq_ipv4.c | 1 + net/ipv6/netfilter/nft_masq_ipv6.c | 1 + net/netfilter/nf_tables_api.c | 14 ++++++++++++++ net/netfilter/nft_masq.c | 12 ++++++++++++ net/netfilter/nft_nat.c | 12 ++++++++++++ 7 files changed, 46 insertions(+) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3d7292392fac..845c596bf594 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -530,6 +530,9 @@ enum nft_chain_type { NFT_CHAIN_T_MAX }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type); + struct nft_stats { u64 bytes; u64 pkts; diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h index c72729f954f4..e2a518b60e19 100644 --- a/include/net/netfilter/nft_masq.h +++ b/include/net/netfilter/nft_masq.h @@ -13,4 +13,7 @@ int nft_masq_init(const struct nft_ctx *ctx, int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + #endif /* _NFT_MASQ_H_ */ diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 1c636d6b5b50..c1023c445920 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .eval = nft_masq_ipv4_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 556262f40761..8a7ac685076d 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .eval = nft_masq_ipv6_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 556a0dfa4abc..65eb2a1160d5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3744,6 +3744,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .abort = nf_tables_abort, }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type) +{ + const struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EOPNOTSUPP; + } + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 6637bab00567..d1ffd5eb3a9b 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -26,6 +26,11 @@ int nft_masq_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_masq *priv = nft_expr_priv(expr); + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; if (tb[NFTA_MASQ_FLAGS] == NULL) return 0; @@ -55,5 +60,12 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_masq_dump); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 799550b476fb..0f0af6e86fb8 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -95,6 +95,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, u32 family; int err; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + if (tb[NFTA_NAT_TYPE] == NULL) return -EINVAL; @@ -205,6 +209,13 @@ nla_put_failure: return -1; } +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, @@ -212,6 +223,7 @@ static const struct nft_expr_ops nft_nat_ops = { .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, + .validate = nft_nat_validate, }; static struct nft_expr_type nft_nat_type __read_mostly = { -- cgit v1.2.3-58-ga151 From c7abf25af0f41be4b50d44c5b185d52eea360cb8 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 13 Oct 2014 14:34:41 +0200 Subject: mac80211: fix typo in starting baserate for rts_cts_rate_idx It affects non-(V)HT rates and can lead to selecting an rts_cts rate that is not a basic rate or way superior to the reference rate (ATM rates[0] used for the 1st attempt of the protected frame data). E.g, assuming drivers register growing (bitrate) sorted tables of ieee80211_rate-s, having : - rates[0].idx == d'2 and basic_rates == b'10100 will select rts_cts idx b'10011 & ~d'(BIT(2)-1), i.e. 1, likewise - rates[0].idx == d'2 and basic_rates == b'10001 will select rts_cts idx b'10000 The first is not a basic rate and the second is > rates[0]. Also, wrt severity of the addressed misbehavior, ATM we only have one rts_cts_rate_idx rather than one per rate table entry, so this idx might still point to bitrates > rates[1..MAX_RATES]. Fixes: 5253ffb8c9e1 ("mac80211: always pick a basic rate to tx RTS/CTS for pre-HT rates") Cc: stable@vger.kernel.org Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8fdadfd94ba8..6081329784dd 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -448,7 +448,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, */ if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { u32 basic_rates = vif->bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; -- cgit v1.2.3-58-ga151 From 493618a92c6afdd3f6224ab586f169d6a259bb06 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Oct 2014 12:43:50 +0200 Subject: netfilter: nft_compat: fix hook validation for non-base chains Set hook_mask to zero for non-base chains, otherwise people may hit bogus errors from the xt_check_target() and xt_check_match() when validating the uninitialized hook_mask. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7e2683c8a44a..44ae273b4391 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -95,6 +95,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } @@ -293,6 +295,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } -- cgit v1.2.3-58-ga151 From f3f5ddeddd6aeadcef523d55ea9288e3d5c1cbc3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Oct 2014 10:13:48 +0200 Subject: netfilter: nft_compat: validate chain type in match/target We have to validate the real chain type to ensure that matches/targets are not used out from their scope (eg. MASQUERADE in nat chain type). The existing validation relies on the table name, but this is not sufficient since userspace can fool us by using the appropriate table name with a different chain type. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 75 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 44ae273b4391..0480f57a4eb6 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,9 +19,52 @@ #include #include #include -#include /* for set_fs */ #include +static const struct { + const char *name; + u8 type; +} table_to_chaintype[] = { + { "filter", NFT_CHAIN_T_DEFAULT }, + { "raw", NFT_CHAIN_T_DEFAULT }, + { "security", NFT_CHAIN_T_DEFAULT }, + { "mangle", NFT_CHAIN_T_ROUTE }, + { "nat", NFT_CHAIN_T_NAT }, + { }, +}; + +static int nft_compat_table_to_chaintype(const char *table) +{ + int i; + + for (i = 0; table_to_chaintype[i].name != NULL; i++) { + if (strcmp(table_to_chaintype[i].name, table) == 0) + return table_to_chaintype[i].type; + } + + return -1; +} + +static int nft_compat_chain_validate_dependency(const char *tablename, + const struct nft_chain *chain) +{ + enum nft_chain_type type; + const struct nft_base_chain *basechain; + + if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + return 0; + + type = nft_compat_table_to_chaintype(tablename); + if (type < 0) + return -EINVAL; + + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EINVAL; + + return 0; +} + union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; @@ -153,6 +196,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); + if (ret < 0) + goto err; + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -218,6 +265,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, { struct xt_target *target = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -225,11 +273,13 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & target->hooks) - return 0; + if (!(hook_mask & target->hooks)) + return -EINVAL; - /* This target is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(target->table, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -324,6 +374,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + if (ret < 0) + goto err; + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -383,6 +437,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, { struct xt_match *match = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -390,11 +445,13 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & match->hooks) - return 0; + if (!(hook_mask & match->hooks)) + return -EINVAL; - /* This match is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(match->name, + ctx->chain); + if (ret < 0) + return ret; } return 0; } -- cgit v1.2.3-58-ga151 From 5c819a39753d6a3ae9c0092236f59730a369b619 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:16:57 +0200 Subject: netfilter: nft_nat: insufficient attribute validation We have to validate that we at least get an NFTA_NAT_REG_ADDR_MIN or NFTA_NFT_REG_PROTO_MIN attribute. Reject the configuration if none of them are present. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0f0af6e86fb8..5078f1f1c569 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -99,7 +99,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (tb[NFTA_NAT_TYPE] == NULL) + if (tb[NFTA_NAT_TYPE] == NULL || + (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && + tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) return -EINVAL; switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { -- cgit v1.2.3-58-ga151 From 61cfac6b42af98ab46bcb3a47e150e7b20d5015e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:19:35 +0200 Subject: netfilter: nft_nat: NFTA_NAT_REG_ADDR_MAX depends on NFTA_NAT_REG_ADDR_MIN Interpret NFTA_NAT_REG_ADDR_MAX if NFTA_NAT_REG_ADDR_MIN is present, otherwise, skip it. Same thing with NFTA_NAT_REG_PROTO_MAX. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 50 +++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 5078f1f1c569..a95e0c1addd3 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -126,38 +126,44 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MIN])); + priv->sreg_addr_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else { + priv->sreg_addr_max = priv->sreg_addr_min; + } + } if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MIN])); + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } if (tb[NFTA_NAT_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); -- cgit v1.2.3-58-ga151 From 1e2d56a5d33a7e1fcd21ed3859f52596d02708b0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:24:14 +0200 Subject: netfilter: nft_nat: dump attributes if they are set Dump NFTA_NAT_REG_ADDR_MIN if this is non-zero. Same thing with NFTA_NAT_REG_PROTO_MIN. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a95e0c1addd3..afe2b0b45ec4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -191,17 +191,19 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; + + if (priv->sreg_addr_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, + htonl(priv->sreg_addr_min)) || + nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, + htonl(priv->sreg_addr_max))) + goto nla_put_failure; + } + if (priv->sreg_proto_min) { if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_min)) || + nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) goto nla_put_failure; } -- cgit v1.2.3-58-ga151 From 95ff88688781db2f64042e69bd499e518bbb36e5 Mon Sep 17 00:00:00 2001 From: Ian Morgan Date: Sun, 19 Oct 2014 08:05:13 -0400 Subject: ax88179_178a: fix bonding failure The following patch fixes a bug which causes the ax88179_178a driver to be incapable of being added to a bond. When I brought up the issue with the bonding maintainers, they indicated that the real problem was with the NIC driver which must return zero for success (of setting the MAC address). I see that several other NIC drivers follow that pattern by either simply always returing zero, or by passing through a negative (error) result while rewriting any positive return code to zero. With that same philisophy applied to the ax88179_178a driver, it allows it to work correctly with the bonding driver. I believe this is suitable for queuing in -stable, as it's a small, simple, and obvious fix that corrects a defect with no other known workaround. This patch is against vanilla 3.17(.0). Signed-off-by: Ian Morgan drivers/net/usb/ax88179_178a.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index be4275721039..e6338c16081a 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -937,6 +937,7 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p) { struct usbnet *dev = netdev_priv(net); struct sockaddr *addr = p; + int ret; if (netif_running(net)) return -EBUSY; @@ -946,8 +947,12 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p) memcpy(net->dev_addr, addr->sa_data, ETH_ALEN); /* Set the MAC address */ - return ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, + ret = ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, net->dev_addr); + if (ret < 0) + return ret; + + return 0; } static const struct net_device_ops ax88179_netdev_ops = { -- cgit v1.2.3-58-ga151 From 11b2357d5dbce999803e9055f8c09829a8a87db4 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 20 Oct 2014 10:54:36 +0200 Subject: mac80211: minstrels: fix buffer overflow in HT debugfs rc_stats ATM an HT rc_stats line is 106 chars. Times 8(MCS_GROUP_RATES)*3(SS)*2(GI)*2(BW) + CCK(4), i.e. x100, this is well above the current 8192 - sizeof(*ms) currently allocated. Fix this by squeezing the output as follows (not that we're short on memory but this also improves readability and range, the new format adds one more digit to *ok/*cum and ok/cum): - Before (HT) (106 ch): type rate throughput ewma prob this prob retry this succ/attempt success attempts CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0 0 HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0 0 - After (75 ch): type rate tpt eprob *prob ret *ok(*cum) ok( cum) CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0( 0) HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0( 0) - Align non-HT format Before (non-HT) (83 ch): rate throughput ewma prob this prob this succ/attempt success attempts ABCDP 6 0.0 0.0 0.0 0( 0) 0 0 54 0.0 0.0 0.0 0( 0) 0 0 - After (61 ch): rate tpt eprob *prob *ok(*cum) ok( cum) ABCDP 1 0.0 0.0 0.0 0( 0) 0( 0) 54 0.0 0.0 0.0 0( 0) 0( 0) *This also adds dynamic checks for overflow, lowers the size of the non-HT request (allowing > 30 entries) and replaces the buddy-rounded allocations (s/sizeof(*ms) + 8192/8192). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_debugfs.c | 12 +++++++----- net/mac80211/rc80211_minstrel_ht_debugfs.c | 13 ++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index edde723f9f00..2acab1bcaa4b 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -62,14 +62,14 @@ minstrel_stats_open(struct inode *inode, struct file *file) unsigned int i, tp, prob, eprob; char *p; - ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL); + ms = kmalloc(2048, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "rate throughput ewma prob this prob " - "this succ/attempt success attempts\n"); + p += sprintf(p, "rate tpt eprob *prob" + " *ok(*cum) ok( cum)\n"); for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; @@ -86,8 +86,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); eprob = MINSTREL_TRUNC(mrs->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - " %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u" + " %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -102,6 +102,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) mi->sample_packets); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 2048); + return 0; } diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index a72ad46f2a04..d537bec93754 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -63,8 +63,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) prob = MINSTREL_TRUNC(mr->cur_prob * 1000); eprob = MINSTREL_TRUNC(mr->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - "%3u %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u " + "%3u %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -96,14 +96,15 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) return ret; } - ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + ms = kmalloc(8192, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "type rate throughput ewma prob " - "this prob retry this succ/attempt success attempts\n"); + p += sprintf(p, "type rate tpt eprob *prob " + "ret *ok(*cum) ok( cum)\n"); + p = minstrel_ht_stats_dump(mi, max_mcs, p); for (i = 0; i < max_mcs; i++) @@ -118,6 +119,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 8192); + return nonseekable_open(inode, file); } -- cgit v1.2.3-58-ga151 From 1e16aa3ddf863c6b9f37eddf52503230a62dedb3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:16 +0200 Subject: net: gso: use feature flag argument in all protocol gso handlers skb_gso_segment() has a 'features' argument representing offload features available to the output path. A few handlers, e.g. GRE, instead re-fetch the features of skb->dev and use those instead of the provided ones when handing encapsulation/tunnels. Depending on dev->hw_enc_features of the output device skb_gso_segment() can then return NULL even when the caller has disabled all GSO feature bits, as segmentation of inner header thinks device will take care of segmentation. This e.g. affects the tbf scheduler, which will silently drop GRE-encap GSO skbs that did not fit the remaining token quota as the segmentation does not work when device supports corresponding hw offload capabilities. Cc: Pravin B Shelar Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- net/mpls/mpls_gso.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92db7a69f2b9..8b7fe5b03906 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ccda09628de7..f6e345c0bc23 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -68,7 +68,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = skb_mac_gso_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 507310ef4b56..6480cea7aa53 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -58,7 +58,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = gso_inner_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 91014d32488d..a071563a7e6e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -90,7 +90,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); ipv6h = ipv6_hdr(skb); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index e28ed2ef5b06..f0f5309a2d72 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -48,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __skb_push(skb, skb->mac_len); /* Segment inner packet. */ - mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); -- cgit v1.2.3-58-ga151 From 330966e501ffe282d7184fde4518d5e0c24bc7f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:17 +0200 Subject: net: make skb_gso_segment error handling more robust skb_gso_segment has three possible return values: 1. a pointer to the first segmented skb 2. an errno value (IS_ERR()) 3. NULL. This can happen when GSO is used for header verification. However, several callers currently test IS_ERR instead of IS_ERR_OR_NULL and would oops when NULL is returned. Note that these call sites should never actually see such a NULL return value; all callers mask out the GSO bits in the feature argument. However, there have been issues with some protocol handlers erronously not respecting the specified feature mask in some cases. It is preferable to get 'have to turn off hw offloading, else slow' reports rather than 'kernel crashes'. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/netfilter/nfnetlink_queue_core.c | 2 +- net/openvswitch/datapath.c | 2 ++ net/xfrm/xfrm_output.c | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 88e5ef2c7f51..bc6471d4abcd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -231,7 +231,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { + if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index a82077d9f59b..7c60ccd61a3e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -665,7 +665,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) * returned by nf_queue. For instance, callers rely on -ECANCELED to * mean 'ignore this hook'. */ - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out_err; queued = 0; err = 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2e31d9e7f4dc..e6d7255183eb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -324,6 +324,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; /* Queue all of the segments. */ skb = segs; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 499d6c18a8ce..7c532856b398 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -157,6 +157,8 @@ static int xfrm_output_gso(struct sk_buff *skb) kfree_skb(skb); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; do { struct sk_buff *nskb = segs->next; -- cgit v1.2.3-58-ga151 From f993bc25e5196e60514c216d0bca0f600de64af8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:18 +0200 Subject: net: core: handle encapsulation offloads when computing segment lengths if ->encapsulation is set we have to use inner_tcp_hdrlen and add the size of the inner network headers too. This is 'mostly harmless'; tbf might send skb that is slightly over quota or drop skb even if it would have fit. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/skbuff.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 61059a05ec95..c16615bfb61e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4070,15 +4070,22 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - return tcp_hdrlen(skb) + shinfo->gso_size; + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already * accounted for. */ - return shinfo->gso_size; + return thlen + shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); -- cgit v1.2.3-58-ga151 From b6931c9ba728d60c542c39ff037fe6f595c074a2 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Sun, 19 Oct 2014 14:20:27 +0530 Subject: enic: fix possible deadlock in enic_stop/ enic_rfs_flw_tbl_free The following warning is shown when spinlock debug is enabled. This occurs when enic_flow_may_expire timer function is running and enic_stop is called on same CPU. Fix this by using spink_lock_bh(). ================================= [ INFO: inconsistent lock state ] 3.17.0-netnext-05504-g59f35b8 #268 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. ifconfig/443 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&enic->rfs_h.lock)->rlock){+.?...}, at: enic_rfs_flw_tbl_free+0x34/0xd0 [enic] {IN-SOFTIRQ-W} state was registered at: [] __lock_acquire+0x83f/0x21c0 [] lock_acquire+0xa2/0xd0 [] _raw_spin_lock+0x3c/0x80 [] enic_flow_may_expire+0x25/0x130[enic] [] call_timer_fn+0x77/0x100 [] run_timer_softirq+0x1e3/0x270 [] __do_softirq+0x14e/0x280 [] irq_exit+0x8e/0xb0 [] smp_apic_timer_interrupt+0x3f/0x50 [] apic_timer_interrupt+0x72/0x80 [] default_idle+0x13/0x20 [] arch_cpu_idle+0xa/0x10 [] cpu_startup_entry+0x2c6/0x330 [] start_secondary+0x21d/0x290 irq event stamp: 2997 hardirqs last enabled at (2997): [] _raw_spin_unlock_irqrestore+0x65/0x90 hardirqs last disabled at (2996): [] _raw_spin_lock_irqsave+0x26/0x90 softirqs last enabled at (2968): [] dev_deactivate_many+0x213/0x260 softirqs last disabled at (2966): [] dev_deactivate_many+0x1f3/0x260 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&enic->rfs_h.lock)->rlock); lock(&(&enic->rfs_h.lock)->rlock); *** DEADLOCK *** Reported-by: Jan Stancek Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_clsf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 69dfd3c9e529..0be6850be8a2 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -86,7 +86,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic) int i; enic_rfs_timer_stop(enic); - spin_lock(&enic->rfs_h.lock); + spin_lock_bh(&enic->rfs_h.lock); enic->rfs_h.free = 0; for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) { struct hlist_head *hhead; @@ -100,7 +100,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic) kfree(n); } } - spin_unlock(&enic->rfs_h.lock); + spin_unlock_bh(&enic->rfs_h.lock); } struct enic_rfs_fltr_node *htbl_fltr_search(struct enic *enic, u16 fltr_id) @@ -128,7 +128,7 @@ void enic_flow_may_expire(unsigned long data) bool res; int j; - spin_lock(&enic->rfs_h.lock); + spin_lock_bh(&enic->rfs_h.lock); for (j = 0; j < ENIC_CLSF_EXPIRE_COUNT; j++) { struct hlist_head *hhead; struct hlist_node *tmp; @@ -148,7 +148,7 @@ void enic_flow_may_expire(unsigned long data) } } } - spin_unlock(&enic->rfs_h.lock); + spin_unlock_bh(&enic->rfs_h.lock); mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); } @@ -183,7 +183,7 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -EPROTONOSUPPORT; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; - spin_lock(&enic->rfs_h.lock); + spin_lock_bh(&enic->rfs_h.lock); n = htbl_key_search(&enic->rfs_h.ht_head[tbl_idx], &keys); if (n) { /* entry already present */ @@ -277,7 +277,7 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } ret_unlock: - spin_unlock(&enic->rfs_h.lock); + spin_unlock_bh(&enic->rfs_h.lock); return res; } -- cgit v1.2.3-58-ga151 From 39dc90c159c1bcc0fdd42913a7d560b1a1cd3acf Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Sun, 19 Oct 2014 14:20:28 +0530 Subject: enic: Do not call napi_disable when preemption is disabled. In enic_stop, we disable preemption using local_bh_disable(). We disable preemption to wait for busy_poll to finish. napi_disable should not be called here as it might sleep. Moving napi_disable() call out side of local_bh_disable. BUG: sleeping function called from invalid context at include/linux/netdevice.h:477 in_atomic(): 1, irqs_disabled(): 0, pid: 443, name: ifconfig INFO: lockdep is turned off. Preemption disabled at:[] enic_rfs_flw_tbl_free+0x34/0xd0 [enic] CPU: 31 PID: 443 Comm: ifconfig Not tainted 3.17.0-netnext-05504-g59f35b8 #268 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff8800dac10000 ffff88020b8dfcb8 ffffffff8148a57c 0000000000000000 ffff88020b8dfcd0 ffffffff8107e253 ffff8800dac12a40 ffff88020b8dfd10 ffffffffa029305b ffff88020b8dfd48 ffff8800dac10000 ffff88020b8dfd48 Call Trace: [] dump_stack+0x4e/0x7a [] __might_sleep+0x123/0x1a0 [] enic_stop+0xdb/0x4d0 [enic] [] __dev_close_many+0x9d/0xf0 [] __dev_close+0x31/0x50 [] __dev_change_flags+0x98/0x160 [] dev_change_flags+0x24/0x60 [] devinet_ioctl+0x63d/0x710 [] ? might_fault+0x56/0xc0 [] inet_ioctl+0x65/0x90 [] sock_do_ioctl+0x20/0x50 [] sock_ioctl+0x20b/0x2e0 [] do_vfs_ioctl+0x2e0/0x500 [] ? sysret_check+0x22/0x5d [] ? __this_cpu_preempt_check+0x13/0x20 [] ? trace_hardirqs_on_caller+0x119/0x270 [] SyS_ioctl+0x3c/0x80 [] system_call_fastpath+0x1a/0x1f Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 929bfe70080a..180e53fa628f 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1674,13 +1674,13 @@ static int enic_stop(struct net_device *netdev) enic_dev_disable(enic); - local_bh_disable(); for (i = 0; i < enic->rq_count; i++) { napi_disable(&enic->napi[i]); + local_bh_disable(); while (!enic_poll_lock_napi(&enic->rq[i])) mdelay(1); + local_bh_enable(); } - local_bh_enable(); netif_carrier_off(netdev); netif_tx_disable(netdev); -- cgit v1.2.3-58-ga151 From 7b8613e0a1502b43b3b36c93c66f835c891f63b3 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 20 Oct 2014 14:44:25 +0800 Subject: tipc: fix a potential deadlock Locking dependency detected below possible unsafe locking scenario: CPU0 CPU1 T0: tipc_named_rcv() tipc_rcv() T1: [grab nametble write lock]* [grab node lock]* T2: tipc_update_nametbl() tipc_node_link_up() T3: tipc_nodesub_subscribe() tipc_nametbl_publish() T4: [grab node lock]* [grab nametble write lock]* The opposite order of holding nametbl write lock and node lock on above two different paths may result in a deadlock. If we move the the updating of the name table after link state named out of node lock, the reverse order of holding locks will be eliminated, and as a result, the deadlock risk. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 46 ++++++++++++++++++++++++++++------------------ net/tipc/node.h | 7 ++++++- net/tipc/socket.c | 2 +- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 90cee4a6fce4..5781634e957d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -219,11 +219,11 @@ void tipc_node_abort_sock_conns(struct list_head *conns) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; - u32 addr = n_ptr->addr; n_ptr->working_links++; - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, - l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + pr_info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->net_plane); @@ -284,10 +284,10 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; - u32 addr = n_ptr->addr; n_ptr->working_links--; - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", @@ -552,28 +552,30 @@ void tipc_node_unlock(struct tipc_node *node) LIST_HEAD(conn_sks); struct sk_buff_head waiting_sks; u32 addr = 0; - unsigned int flags = node->action_flags; + int flags = node->action_flags; + u32 link_id = 0; - if (likely(!node->action_flags)) { + if (likely(!flags)) { spin_unlock_bh(&node->lock); return; } + addr = node->addr; + link_id = node->link_id; __skb_queue_head_init(&waiting_sks); - if (node->action_flags & TIPC_WAKEUP_USERS) { + + if (flags & TIPC_WAKEUP_USERS) skb_queue_splice_init(&node->waiting_sks, &waiting_sks); - node->action_flags &= ~TIPC_WAKEUP_USERS; - } - if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { + + if (flags & TIPC_NOTIFY_NODE_DOWN) { list_replace_init(&node->nsub, &nsub_list); list_replace_init(&node->conn_sks, &conn_sks); - node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } - if (node->action_flags & TIPC_NOTIFY_NODE_UP) { - node->action_flags &= ~TIPC_NOTIFY_NODE_UP; - addr = node->addr; - } - node->action_flags &= ~TIPC_WAKEUP_BCAST_USERS; + node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN | + TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP | + TIPC_NOTIFY_LINK_DOWN | + TIPC_WAKEUP_BCAST_USERS); + spin_unlock_bh(&node->lock); while (!skb_queue_empty(&waiting_sks)) @@ -588,6 +590,14 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_WAKEUP_BCAST_USERS) tipc_bclink_wakeup_users(); - if (addr) + if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + link_id, addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index 67513c3c852c..04e91458bb29 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,6 +53,7 @@ * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), @@ -60,7 +61,9 @@ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_WAKEUP_USERS = (1 << 5), - TIPC_WAKEUP_BCAST_USERS = (1 << 6) + TIPC_WAKEUP_BCAST_USERS = (1 << 6), + TIPC_NOTIFY_LINK_UP = (1 << 7), + TIPC_NOTIFY_LINK_DOWN = (1 << 8) }; /** @@ -100,6 +103,7 @@ struct tipc_node_bclink { * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node */ @@ -116,6 +120,7 @@ struct tipc_node { int link_cnt; int working_links; u32 signature; + u32 link_id; struct list_head nsub; struct sk_buff_head waiting_sks; struct list_head conn_sks; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75275c5cf929..3043f10dc328 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2673,7 +2673,7 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; -- cgit v1.2.3-58-ga151 From 1a194c2d59c55c37cb4c0c459d5418071a141341 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 20 Oct 2014 14:46:35 +0800 Subject: tipc: fix lockdep warning when intra-node messages are delivered When running tipcTC&tipcTS test suite, below lockdep unsafe locking scenario is reported: [ 1109.997854] [ 1109.997988] ================================= [ 1109.998290] [ INFO: inconsistent lock state ] [ 1109.998575] 3.17.0-rc1+ #113 Not tainted [ 1109.998762] --------------------------------- [ 1109.998762] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 1109.998762] swapper/7/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 1109.998762] (slock-AF_TIPC){+.?...}, at: [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] {SOFTIRQ-ON-W} state was registered at: [ 1109.998762] [] __lock_acquire+0x6a0/0x1d80 [ 1109.998762] [] lock_acquire+0x95/0x1e0 [ 1109.998762] [] _raw_spin_lock+0x3e/0x80 [ 1109.998762] [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] tipc_link_xmit+0xa8/0xc0 [tipc] [ 1109.998762] [] tipc_sendmsg+0x15f/0x550 [tipc] [ 1109.998762] [] tipc_connect+0x105/0x140 [tipc] [ 1109.998762] [] SYSC_connect+0xae/0xc0 [ 1109.998762] [] SyS_connect+0xe/0x10 [ 1109.998762] [] compat_SyS_socketcall+0xb8/0x200 [ 1109.998762] [] sysenter_dispatch+0x7/0x1f [ 1109.998762] irq event stamp: 241060 [ 1109.998762] hardirqs last enabled at (241060): [] __local_bh_enable_ip+0x6d/0xd0 [ 1109.998762] hardirqs last disabled at (241059): [] __local_bh_enable_ip+0x2f/0xd0 [ 1109.998762] softirqs last enabled at (241020): [] _local_bh_enable+0x22/0x50 [ 1109.998762] softirqs last disabled at (241021): [] irq_exit+0x96/0xc0 [ 1109.998762] [ 1109.998762] other info that might help us debug this: [ 1109.998762] Possible unsafe locking scenario: [ 1109.998762] [ 1109.998762] CPU0 [ 1109.998762] ---- [ 1109.998762] lock(slock-AF_TIPC); [ 1109.998762] [ 1109.998762] lock(slock-AF_TIPC); [ 1109.998762] [ 1109.998762] *** DEADLOCK *** [ 1109.998762] [ 1109.998762] 2 locks held by swapper/7/0: [ 1109.998762] #0: (rcu_read_lock){......}, at: [] __netif_receive_skb_core+0x69/0xb70 [ 1109.998762] #1: (rcu_read_lock){......}, at: [] tipc_l2_rcv_msg+0x40/0x260 [tipc] [ 1109.998762] [ 1109.998762] stack backtrace: [ 1109.998762] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 3.17.0-rc1+ #113 [ 1109.998762] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 1109.998762] ffffffff82745830 ffff880016c03828 ffffffff81a209eb 0000000000000007 [ 1109.998762] ffff880017b3cac0 ffff880016c03888 ffffffff81a1c5ef 0000000000000001 [ 1109.998762] ffff880000000001 ffff880000000000 ffffffff81012d4f 0000000000000000 [ 1109.998762] Call Trace: [ 1109.998762] [] dump_stack+0x4e/0x68 [ 1109.998762] [] print_usage_bug+0x1f1/0x202 [ 1109.998762] [] ? save_stack_trace+0x2f/0x50 [ 1109.998762] [] mark_lock+0x28c/0x2f0 [ 1109.998762] [] ? print_irq_inversion_bug.part.46+0x1f0/0x1f0 [ 1109.998762] [] __lock_acquire+0x5ad/0x1d80 [ 1109.998762] [] ? trace_hardirqs_on+0xd/0x10 [ 1109.998762] [] ? sched_clock_cpu+0x98/0xc0 [ 1109.998762] [] ? local_clock+0x1b/0x30 [ 1109.998762] [] ? lock_release_holdtime.part.29+0x1c/0x1a0 [ 1109.998762] [] ? sched_clock_local+0x25/0x90 [ 1109.998762] [] ? tipc_sk_get+0x60/0x80 [tipc] [ 1109.998762] [] lock_acquire+0x95/0x1e0 [ 1109.998762] [] ? tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] ? trace_hardirqs_on_caller+0xa6/0x1c0 [ 1109.998762] [] _raw_spin_lock+0x3e/0x80 [ 1109.998762] [] ? tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] ? tipc_sk_get+0x60/0x80 [tipc] [ 1109.998762] [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] tipc_rcv+0x5ed/0x960 [tipc] [ 1109.998762] [] tipc_l2_rcv_msg+0xcc/0x260 [tipc] [ 1109.998762] [] ? tipc_l2_rcv_msg+0x40/0x260 [tipc] [ 1109.998762] [] __netif_receive_skb_core+0x5e5/0xb70 [ 1109.998762] [] ? __netif_receive_skb_core+0x69/0xb70 [ 1109.998762] [] ? dev_gro_receive+0x259/0x4e0 [ 1109.998762] [] __netif_receive_skb+0x26/0x70 [ 1109.998762] [] netif_receive_skb_internal+0x2d/0x1f0 [ 1109.998762] [] napi_gro_receive+0xd8/0x240 [ 1109.998762] [] e1000_clean_rx_irq+0x2c4/0x530 [ 1109.998762] [] e1000_clean+0x266/0x9c0 [ 1109.998762] [] ? local_clock+0x1b/0x30 [ 1109.998762] [] ? sched_clock_local+0x25/0x90 [ 1109.998762] [] net_rx_action+0x141/0x310 [ 1109.998762] [] ? handle_fasteoi_irq+0xe0/0x150 [ 1109.998762] [] __do_softirq+0x116/0x4d0 [ 1109.998762] [] irq_exit+0x96/0xc0 [ 1109.998762] [] do_IRQ+0x67/0x110 [ 1109.998762] [] common_interrupt+0x6f/0x6f [ 1109.998762] [] ? default_idle+0x37/0x250 [ 1109.998762] [] ? default_idle+0x35/0x250 [ 1109.998762] [] arch_cpu_idle+0xf/0x20 [ 1109.998762] [] cpu_startup_entry+0x27d/0x4d0 [ 1109.998762] [] start_secondary+0x188/0x1f0 When intra-node messages are delivered from one process to another process, tipc_link_xmit() doesn't disable BH before it directly calls tipc_sk_rcv() on process context to forward messages to destination socket. Meanwhile, if messages delivered by remote node arrive at the node and their destinations are also the same socket, tipc_sk_rcv() running on process context might be preempted by tipc_sk_rcv() running BH context. As a result, the latter cannot obtain the socket lock as the lock was obtained by the former, however, the former has no chance to be run as the latter is owning the CPU now, so headlock happens. To avoid it, BH should be always disabled in tipc_sk_rcv(). Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3043f10dc328..51bddc236a15 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1776,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *buf) sk = &tsk->sk; /* Queue message */ - bh_lock_sock(sk); + spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user(sk)) { rc = filter_rcv(sk, buf); @@ -1787,7 +1787,7 @@ int tipc_sk_rcv(struct sk_buff *buf) if (sk_add_backlog(sk, buf, limit)) rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); + spin_unlock_bh(&sk->sk_lock.slock); tipc_sk_put(tsk); if (likely(!rc)) return 0; -- cgit v1.2.3-58-ga151 From 78fd1d0ab072d4d9b5f0b7c14a1516665170b565 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Oct 2014 22:05:38 +0200 Subject: netlink: Re-add locking to netlink_lookup() and seq walker The synchronize_rcu() in netlink_release() introduces unacceptable latency. Reintroduce minimal lookup so we can drop the synchronize_rcu() until socket destruction has been RCUfied. Cc: David S. Miller Cc: Eric Dumazet Reported-by: Steinar H. Gunderson Reported-and-tested-by: Heiko Carstens Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7a186e74b1b3..f1de72de273e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); +/* nl_table locking explained: + * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock + * combined with an RCU read-side lock. Insertion and removal are protected + * with nl_sk_hash_lock while using RCU list modification primitives and may + * run in parallel to nl_table_lock protected lookups. Destruction of the + * Netlink socket may only occur *after* nl_table_lock has been acquired + * either during or after the socket has been removed from the list. + */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -109,10 +117,10 @@ EXPORT_SYMBOL_GPL(nl_sk_hash_lock); static int lockdep_nl_sk_hash_is_held(void) { #ifdef CONFIG_LOCKDEP - return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; -#else - return 1; + if (debug_locks) + return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); #endif + return 1; } static ATOMIC_NOTIFIER_HEAD(netlink_chain); @@ -1028,11 +1036,13 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct netlink_table *table = &nl_table[protocol]; struct sock *sk; + read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); + read_unlock(&nl_table_lock); return sk; } @@ -1257,9 +1267,6 @@ static int netlink_release(struct socket *sock) } netlink_table_ungrab(); - /* Wait for readers to complete */ - synchronize_net(); - kfree(nlk->groups); nlk->groups = NULL; @@ -1281,6 +1288,7 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); + netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1288,9 +1296,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); + netlink_table_ungrab(); goto retry; } rcu_read_unlock(); + netlink_table_ungrab(); err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) @@ -2921,14 +2931,16 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(nl_table_lock) __acquires(RCU) { + read_lock(&nl_table_lock); rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct rhashtable *ht; struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; @@ -2943,19 +2955,19 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter = seq->private; nlk = v; - rht_for_each_entry_rcu(nlk, nlk->node.next, node) + i = iter->link; + ht = &nl_table[i].hash; + rht_for_each_entry(nlk, nlk->node.next, ht, node) if (net_eq(sock_net((struct sock *)nlk), net)) return nlk; - i = iter->link; j = iter->hash_idx + 1; do { - struct rhashtable *ht = &nl_table[i].hash; const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { if (net_eq(sock_net((struct sock *)nlk), net)) { iter->link = i; iter->hash_idx = j; @@ -2971,9 +2983,10 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(RCU) __releases(nl_table_lock) { rcu_read_unlock(); + read_unlock(&nl_table_lock); } -- cgit v1.2.3-58-ga151 From 32bf08a6257b9c7380dcd040af3c0858eee3ef05 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 20 Oct 2014 14:54:57 -0700 Subject: bpf: fix bug in eBPF verifier while comparing for verifier state equivalency the comparison was missing a check for uninitialized register. Make sure it does so and add a testcase. Fixes: f1bca824dabb ("bpf: add search pruning optimization to verifier") Cc: Hannes Frederic Sowa Signed-off-by: Alexei Starovoitov Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 3 ++- samples/bpf/test_verifier.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 801f5f3b9307..9f81818f2941 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1409,7 +1409,8 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) if (memcmp(&old->regs[i], &cur->regs[i], sizeof(old->regs[0])) != 0) { if (old->regs[i].type == NOT_INIT || - old->regs[i].type == UNKNOWN_VALUE) + (old->regs[i].type == UNKNOWN_VALUE && + cur->regs[i].type != NOT_INIT)) continue; return false; } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index f44ef11f65a7..eb4bec0ad8af 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -208,6 +208,17 @@ static struct bpf_test tests[] = { .errstr = "R0 !read_ok", .result = REJECT, }, + { + "program doesn't init R0 before exit in all branches", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + }, { "stack out of bounds", .insns = { -- cgit v1.2.3-58-ga151 From 7c1c97d54f9bfc810908d3903cb8bcacf734df18 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 21 Oct 2014 11:23:30 +0200 Subject: net: sched: initialize bstats syncp Use netdev_alloc_pcpu_stats to allocate percpu stats and initialize syncp. Fixes: 22e0f8b9322c "net: sched: make bstats per cpu and estimator RCU safe" Signed-off-by: Sabrina Dubroca Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cf61b3e633c..76f402e05bd6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -947,7 +947,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = - alloc_percpu(struct gnet_stats_basic_cpu); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!sch->cpu_bstats) goto err_out4; -- cgit v1.2.3-58-ga151 From e37ad9fd636071e45368d1d9cc3b7b421281ce7f Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Mon, 13 Oct 2014 13:09:28 -0300 Subject: netfilter: nf_conntrack: allow server to become a client in TW handling When a port that was used to listen for inbound connections gets closed and reused for outgoing connections (like rsh ends up doing for stderr flow), current we may reject the SYN/ACK packet for the new connection because tcp_conntracks states forbirds a port to become a client while there is still a TIME_WAIT entry in there for it. As TCP may expire the TIME_WAIT socket in 60s and conntrack's timeout for it is 120s, there is a ~60s window that the application can end up opening a port that conntrack will end up blocking. This patch fixes this by simply allowing such state transition: if we see a SYN, in TIME_WAIT state, on REPLY direction, move it to sSS. Note that the rest of the code already handles this situation, more specificly in tcp_packet(), first switch clause. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44d1ea32570a..d87b6423ffb2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open @@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sFW -> sIV * sCW -> sIV * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. + * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -- cgit v1.2.3-58-ga151 From 0f9f5e1b83abd2b37c67658e02a6fc9001831fa5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Oct 2014 11:28:12 +0300 Subject: netfilter: ipset: off by one in ip_set_nfnl_get_byindex() The ->ip_set_list[] array is initialized in ip_set_net_init() and it has ->ip_set_max elements so this check should be >= instead of > otherwise we are off by one. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 912e5a05b79d..86f9d76b1464 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - if (index > inst->ip_set_max) + if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); -- cgit v1.2.3-58-ga151 From c123bb7163043bb8f33858cf8e45b01c17dbd171 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 21 Oct 2014 11:08:21 +0200 Subject: netfilter: nf_tables: check for NULL in nf_tables_newchain pcpu stats allocation alloc_percpu returns NULL on failure, not a negative error code. Fixes: ff3cd7b3c922 ("netfilter: nf_tables: refactor chain statistic routines") Signed-off-by: Sabrina Dubroca Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65eb2a1160d5..11ab4b078f3b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (IS_ERR(stats)) { + if (stats == NULL) { module_put(type->owner); kfree(basechain); - return PTR_ERR(stats); + return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } -- cgit v1.2.3-58-ga151 From 8fc963515e893867330dec87464e9edc5204c024 Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Tue, 21 Oct 2014 14:50:29 +0100 Subject: sfc: remove incorrect EFX_BUG_ON_PARANOID check write_count and insert_count can wrap around, making > check invalid. Fixes: 70b33fb0ddec827cbbd14cdc664fc27b2ef4a6b6 ("sfc: add support for skb->xmit_more"). Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index ee84a90e371c..aaf2987512b5 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -343,8 +343,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) unsigned short dma_flags; int i = 0; - EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); - if (skb_shinfo(skb)->gso_size) return efx_enqueue_skb_tso(tx_queue, skb); @@ -1258,8 +1256,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count); - rc = tso_start(&state, efx, skb); if (rc) goto mem_err; -- cgit v1.2.3-58-ga151 From a63ba13eec092b70d4e5522d692eaeb2f9747387 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Tue, 21 Oct 2014 16:06:05 +0200 Subject: net: tso: fix unaligned access to crafted TCP header in helper API The crafted header start address is from a driver supplied buffer, which one can reasonably expect to be aligned on a 4-bytes boundary. However ATM the TSO helper API is only used by ethernet drivers and the tcp header will then be aligned to a 2-bytes only boundary from the header start address. Signed-off-by: Karl Beldan Cc: Ezequiel Garcia Signed-off-by: David S. Miller --- net/core/tso.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/tso.c b/net/core/tso.c index 8c3203c585b0..630b30b4fb53 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,6 +1,7 @@ #include #include #include +#include /* Calculate expected number of TX descriptors */ int tso_count_descs(struct sk_buff *skb) @@ -23,7 +24,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, iph->id = htons(tso->ip_id); iph->tot_len = htons(size + hdr_len - mac_hdr_len); tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - tcph->seq = htonl(tso->tcp_seq); + put_unaligned_be32(tso->tcp_seq, &tcph->seq); tso->ip_id++; if (!is_last) { -- cgit v1.2.3-58-ga151 From 8751b12cd93cc37337256f650e309b8364d40b35 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Oct 2014 09:05:47 +0200 Subject: net: fs_enet: set back promiscuity mode after restart After interface restart (eg: after link disconnection/reconnection), the bridge function doesn't work anymore. This is due to the promiscuous mode being cleared by the restart. The mac-fcc already includes code to set the promiscuous mode back during the restart. This patch adds the same handling to mac-fec and mac-scc. Tested with bridge function on MPC885 with FEC. Reported-by: Germain Montoies Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 3 +++ drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 3d4e08be1709..b34214e2df5f 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -341,6 +341,9 @@ static void restart(struct net_device *dev) FC(fecp, x_cntrl, FEC_TCNTRL_FDEN); /* FD disable */ } + /* Restore multicast and promiscuous settings */ + set_multicast_list(dev); + /* * Enable interrupts we wish to service. */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index f30411f0701f..7a184e8816a4 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -355,6 +355,9 @@ static void restart(struct net_device *dev) if (fep->phydev->duplex) S16(sccp, scc_psmr, SCC_PSMR_LPB | SCC_PSMR_FDE); + /* Restore multicast and promiscuous settings */ + set_multicast_list(dev); + S32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); } -- cgit v1.2.3-58-ga151 From 789f202326640814c52f82e80cef3584d8254623 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 22 Oct 2014 17:09:53 +0800 Subject: xfrm6: fix a potential use after free in xfrm6_policy.c pskb_may_pull() maybe change skb->data and make nh and exthdr pointer oboslete, so recompute the nd and exthdr Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ac49f84fe2c3..5f983644373a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (!onlyproto && (nh + offset + 4 < skb->data || pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports = (__be16 *)exthdr; + __be16 *ports; + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); fl6->fl6_sport = ports[!!reverse]; fl6->fl6_dport = ports[!reverse]; } @@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { - u8 *icmp = (u8 *)exthdr; + u8 *icmp; + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); fl6->fl6_icmp_type = icmp[0]; fl6->fl6_icmp_code = icmp[1]; } @@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; - mh = (struct ip6_mh *)exthdr; + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); fl6->fl6_mh_type = mh->ip6mh_type; } fl6->flowi6_proto = nexthdr; -- cgit v1.2.3-58-ga151 From 9e7ceb060754f134231f68cb29d5db31419fe1ed Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Wed, 22 Oct 2014 21:42:01 +0530 Subject: net: fix saving TX flow hash in sock for outgoing connections The commit "net: Save TX flow hash in sock and set in skbuf on xmit" introduced the inet_set_txhash() and ip6_set_txhash() routines to calculate and record flow hash(sk_txhash) in the socket structure. sk_txhash is used to set skb->hash which is used to spread flows across multiple TXQs. But, the above routines are invoked before the source port of the connection is created. Because of this all outgoing connections that just differ in the source port get hashed into the same TXQ. This patch fixes this problem for IPv4/6 by invoking the the above routines after the source port is available for the socket. Fixes: b73c3d0e4("net: Save TX flow hash in sock and set in skbuf on xmit") Signed-off-by: Sathya Perla Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94d1a7757ff7..9c7d7621466b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; - inet_set_txhash(sk); - inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; + inet_set_txhash(sk); + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 831495529b82..ace29b60813c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -200,8 +200,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; - ip6_set_txhash(sk); - /* * TCP over IPv4 */ @@ -297,6 +295,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; + ip6_set_txhash(sk); + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, -- cgit v1.2.3-58-ga151 From 81f35ffde0e95ee18de83646bbf93dda55d9cc8b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 22 Oct 2014 16:34:35 +0200 Subject: net: fec: ptp: fix NULL pointer dereference if ptp_clock is not set Since commit 278d24047891 (net: fec: ptp: Enable PPS output based on ptp clock) fec_enet_interrupt calls fec_ptp_check_pps_event unconditionally, which calls into ptp_clock_event. If fep->ptp_clock is NULL, ptp_clock_event tries to dereference the NULL pointer. Since on i.MX53 fep->bufdesc_ex is not set, fec_ptp_init is never called, and fep->ptp_clock is NULL, which reliably causes a kernel panic. This patch adds a check for fep->ptp_clock == NULL in fec_enet_interrupt. Signed-off-by: Philipp Zabel Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 81b96cf87574..50a851db2852 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1581,7 +1581,8 @@ fec_enet_interrupt(int irq, void *dev_id) complete(&fep->mdio_done); } - fec_ptp_check_pps_event(fep); + if (fep->ptp_clock) + fec_ptp_check_pps_event(fep); return ret; } -- cgit v1.2.3-58-ga151 From 386f1c9650b7fe4849d2942bd42f41f0ca3aedfb Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Wed, 22 Oct 2014 11:26:11 -0500 Subject: amd-xgbe: Properly handle feature changes via ethtool The ndo_set_features callback function was improperly using an unsigned int to save the current feature value for features such as NETIF_F_RXCSUM. Since that feature is in the upper 32 bits of a 64 bit variable the result was always 0 making it not possible to actually turn off the hardware RX checksum support. Change the unsigned int type to the netdev_features_t type in order to properly capture the current value and perform the proper operation. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 29554992215a..a480b231ba37 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1465,7 +1465,7 @@ static int xgbe_set_features(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int rxcsum, rxvlan, rxvlan_filter; + netdev_features_t rxcsum, rxvlan, rxvlan_filter; rxcsum = pdata->netdev_features & NETIF_F_RXCSUM; rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX; -- cgit v1.2.3-58-ga151 From 55ca6bcd733b739d5667d48d7591899f376dcfb8 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Wed, 22 Oct 2014 11:26:17 -0500 Subject: amd-xgbe: Fix napi Rx budget accounting Currently the amd-xgbe driver increments the packets processed counter each time a descriptor is processed. Since a packet can be represented by more than one descriptor incrementing the counter in this way is not appropriate. Also, since multiple descriptors cause the budget check to be short circuited, sometimes the returned value from the poll function would be larger than the budget value resulting in a WARN_ONCE being triggered. Update the polling logic to properly account for the number of packets processed and exit when the budget value is reached. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index a480b231ba37..2349ea970255 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1598,7 +1598,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) struct skb_shared_hwtstamps *hwtstamps; unsigned int incomplete, error, context_next, context; unsigned int len, put_len, max_len; - int received = 0; + unsigned int received = 0; + int packet_count = 0; DBGPR("-->xgbe_rx_poll: budget=%d\n", budget); @@ -1608,7 +1609,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) rdata = XGBE_GET_DESC_DATA(ring, ring->cur); packet = &ring->packet_data; - while (received < budget) { + while (packet_count < budget) { DBGPR(" cur = %d\n", ring->cur); /* First time in loop see if we need to restore state */ @@ -1662,7 +1663,7 @@ read_again: if (packet->errors) DBGPR("Error in received packet\n"); dev_kfree_skb(skb); - continue; + goto next_packet; } if (!context) { @@ -1677,7 +1678,7 @@ read_again: } dev_kfree_skb(skb); - continue; + goto next_packet; } memcpy(skb_tail_pointer(skb), rdata->skb->data, put_len); @@ -1694,7 +1695,7 @@ read_again: /* Stray Context Descriptor? */ if (!skb) - continue; + goto next_packet; /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; @@ -1705,7 +1706,7 @@ read_again: if (skb->len > max_len) { DBGPR("packet length exceeds configured MTU\n"); dev_kfree_skb(skb); - continue; + goto next_packet; } #ifdef XGMAC_ENABLE_RX_PKT_DUMP @@ -1739,6 +1740,9 @@ read_again: netdev->last_rx = jiffies; napi_gro_receive(&pdata->napi, skb); + +next_packet: + packet_count++; } /* Check if we need to save state before leaving */ @@ -1752,9 +1756,9 @@ read_again: rdata->state.error = error; } - DBGPR("<--xgbe_rx_poll: received = %d\n", received); + DBGPR("<--xgbe_rx_poll: packet_count = %d\n", packet_count); - return received; + return packet_count; } static int xgbe_poll(struct napi_struct *napi, int budget) -- cgit v1.2.3-58-ga151 From 942396b01989d54977120f3625e5ba31afe7a75c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 22 Oct 2014 13:47:18 -0700 Subject: hyperv: Fix the total_data_buflen in send path total_data_buflen is used by netvsc_send() to decide if a packet can be put into send buffer. It should also include the size of RNDIS message before the Ethernet frame. Otherwise, a messge with total size bigger than send_section_size may be copied into the send buffer, and cause data corruption. [Request to include this patch to the Stable branches] Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9e17d1a91e71..78ec33f5100b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -550,6 +550,7 @@ do_lso: do_send: /* Start filling in the page buffers with the rndis hdr */ rndis_msg->msg_len += rndis_msg_size; + packet->total_data_buflen = rndis_msg->msg_len; packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, &packet->page_buf[0]); -- cgit v1.2.3-58-ga151 From 78afe83c3b008e25123bd1be36ee4b6595e595d1 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 9 Oct 2014 23:39:41 +0200 Subject: bcma: fix build when CONFIG_OF_ADDRESS is not set Commit 2101e533f41a ("bcma: register bcma as device tree driver") introduces a hard dependency on OF_ADDRESS into the bcma driver. OF_ADDRESS is specifically disabled for the sparc architecture. This results in the following error when building sparc64:allmodconfig. drivers/bcma/main.c: In function 'bcma_of_find_child_device': drivers/bcma/main.c:150:3: error: implicit declaration of function 'of_translate_address' Fixes: 2101e533f41a ("bcma: register bcma as device tree driver") Reported-by: Guenter Roeck Signed-off-by: Hauke Mehrtens Reviewed-by: Guenter Roeck Signed-off-by: John W. Linville --- drivers/bcma/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index d1656c2f70af..1000955ce09d 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -132,7 +132,7 @@ static bool bcma_is_core_needed_early(u16 core_id) return false; } -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && defined(CONFIG_OF_ADDRESS) static struct device_node *bcma_of_find_child_device(struct platform_device *parent, struct bcma_device *core) { -- cgit v1.2.3-58-ga151 From 47481d977cb2987ab363202c68a79ec1bccd357c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 11 Oct 2014 12:59:53 -0500 Subject: rtlwifi: rtl8192ee: Prevent log spamming for switch statements The driver logs a message when the default branch of switch statements are taken. Such information is useful when debugging, but these log items should not be seen for standard usage. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192ee/hw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c index dfdc9b20e4ad..1a87edca2c3f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ee/hw.c @@ -362,7 +362,7 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } break; default: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, + RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case not process %x\n", variable); break; } @@ -591,7 +591,7 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) acm_ctrl &= (~ACMHW_BEQEN); break; default: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, + RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case not process\n"); break; } @@ -710,7 +710,7 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } break; default: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, + RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case not process %x\n", variable); break; } @@ -2424,7 +2424,7 @@ void rtl92ee_set_key(struct ieee80211_hw *hw, u32 key_index, enc_algo = CAM_AES; break; default: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, + RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, "switch case not process\n"); enc_algo = CAM_TKIP; break; -- cgit v1.2.3-58-ga151 From 59dfdd92288e55bed374309a9944c3a95b4e13c9 Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sun, 12 Oct 2014 13:42:14 +0200 Subject: brcmfmac: dhd_sdio.c: Cleaning up missing null-terminate in conjunction with strncpy Replacing strncpy with strlcpy to avoid strings that lacks null terminate. And changed from using strncat to strlcat to simplify code. Signed-off-by: Rickard Strandqvist Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c | 25 +++++++++++----------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index f55f625fd06b..d20d4e6f391a 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -670,7 +670,6 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci, struct brcmf_sdio_dev *sdiodev) { int i; - uint fw_len, nv_len; char end; for (i = 0; i < ARRAY_SIZE(brcmf_fwname_data); i++) { @@ -684,25 +683,25 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci, return -ENODEV; } - fw_len = sizeof(sdiodev->fw_name) - 1; - nv_len = sizeof(sdiodev->nvram_name) - 1; /* check if firmware path is provided by module parameter */ if (brcmf_firmware_path[0] != '\0') { - strncpy(sdiodev->fw_name, brcmf_firmware_path, fw_len); - strncpy(sdiodev->nvram_name, brcmf_firmware_path, nv_len); - fw_len -= strlen(sdiodev->fw_name); - nv_len -= strlen(sdiodev->nvram_name); + strlcpy(sdiodev->fw_name, brcmf_firmware_path, + sizeof(sdiodev->fw_name)); + strlcpy(sdiodev->nvram_name, brcmf_firmware_path, + sizeof(sdiodev->nvram_name)); end = brcmf_firmware_path[strlen(brcmf_firmware_path) - 1]; if (end != '/') { - strncat(sdiodev->fw_name, "/", fw_len); - strncat(sdiodev->nvram_name, "/", nv_len); - fw_len--; - nv_len--; + strlcat(sdiodev->fw_name, "/", + sizeof(sdiodev->fw_name)); + strlcat(sdiodev->nvram_name, "/", + sizeof(sdiodev->nvram_name)); } } - strncat(sdiodev->fw_name, brcmf_fwname_data[i].bin, fw_len); - strncat(sdiodev->nvram_name, brcmf_fwname_data[i].nv, nv_len); + strlcat(sdiodev->fw_name, brcmf_fwname_data[i].bin, + sizeof(sdiodev->fw_name)); + strlcat(sdiodev->nvram_name, brcmf_fwname_data[i].nv, + sizeof(sdiodev->nvram_name)); return 0; } -- cgit v1.2.3-58-ga151 From 34b6d4299923ec9101bbf364440cee36420b3fc0 Mon Sep 17 00:00:00 2001 From: RafaÅ‚ MiÅ‚ecki Date: Wed, 15 Oct 2014 07:51:44 +0200 Subject: bcma: add another PCI ID of device with BCM43228 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was found attached to the BCM47081A0 SoC. Log: bcma: bus0: Found chip with id 43228, rev 0x00 and package 0x08 Signed-off-by: RafaÅ‚ MiÅ‚ecki Signed-off-by: John W. Linville --- drivers/bcma/host_pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index 1e5ac0a79696..cd9161a8b3a1 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -275,7 +275,7 @@ static SIMPLE_DEV_PM_OPS(bcma_pm_ops, bcma_host_pci_suspend, static const struct pci_device_id bcma_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x0576) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4313) }, - { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 43224) }, + { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 43224) }, /* 0xa8d8 */ { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4331) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4353) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4357) }, @@ -285,7 +285,8 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4727) }, - { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 43227) }, /* 0xA8DB */ + { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 43227) }, /* 0xa8db, BCM43217 (sic!) */ + { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 43228) }, /* 0xa8dc */ { 0, }, }; MODULE_DEVICE_TABLE(pci, bcma_pci_bridge_tbl); -- cgit v1.2.3-58-ga151 From 598a0df07fc6c4642f9b0497cef1233e41d4c987 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 20 Oct 2014 14:57:08 -0700 Subject: rtlwifi: prevent format string usage from leaking Use "%s" in the workqueue allocation to make sure the rtl_hal_cfg name can never accidentally leak information via a format string. Signed-off-by: Kees Cook Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 58ba71830886..40b6d1d006d7 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -467,7 +467,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw) rtl_easy_concurrent_retrytimer_callback, (unsigned long)hw); /* <2> work queue */ rtlpriv->works.hw = hw; - rtlpriv->works.rtl_wq = alloc_workqueue(rtlpriv->cfg->name, 0, 0); + rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name); INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq, (void *)rtl_watchdog_wq_callback); INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq, -- cgit v1.2.3-58-ga151 From 868caae3fe2e35e2353d86af95e03eeaa9439d97 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Tue, 21 Oct 2014 19:23:02 +0530 Subject: ath9k: Enable HW queue control only for MCC Enabling HW queue control for normal (non-mcc) mode causes problems with queue management, resulting in traffic stall. Since it is mainly required for fairness in MCC mode, disable it for the general case. Bug: https://dev.openwrt.org/ticket/18164 Cc: Felix Fietkau Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/init.c | 55 +++++++++++++++++++---------------- drivers/net/wireless/ath/ath9k/main.c | 3 ++ drivers/net/wireless/ath/ath9k/xmit.c | 10 +++++-- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index 156a944134dc..3bd030494986 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -734,6 +734,32 @@ static const struct ieee80211_iface_combination if_comb[] = { #endif }; +#ifdef CONFIG_ATH9K_CHANNEL_CONTEXT +static void ath9k_set_mcc_capab(struct ath_softc *sc, struct ieee80211_hw *hw) +{ + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); + + if (!ath9k_is_chanctx_enabled()) + return; + + hw->flags |= IEEE80211_HW_QUEUE_CONTROL; + hw->queues = ATH9K_NUM_TX_QUEUES; + hw->offchannel_tx_hw_queue = hw->queues - 1; + hw->wiphy->interface_modes &= ~ BIT(NL80211_IFTYPE_WDS); + hw->wiphy->iface_combinations = if_comb_multi; + hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb_multi); + hw->wiphy->max_scan_ssids = 255; + hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; + hw->wiphy->max_remain_on_channel_duration = 10000; + hw->chanctx_data_size = sizeof(void *); + hw->extra_beacon_tailroom = + sizeof(struct ieee80211_p2p_noa_attr) + 9; + + ath_dbg(common, CHAN_CTX, "Use channel contexts\n"); +} +#endif /* CONFIG_ATH9K_CHANNEL_CONTEXT */ + static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) { struct ath_hw *ah = sc->sc_ah; @@ -746,7 +772,6 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) IEEE80211_HW_SPECTRUM_MGMT | IEEE80211_HW_REPORTS_TX_ACK_STATUS | IEEE80211_HW_SUPPORTS_RC_TABLE | - IEEE80211_HW_QUEUE_CONTROL | IEEE80211_HW_SUPPORTS_HT_CCK_RATES; if (ath9k_ps_enable) @@ -781,24 +806,6 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb); } -#ifdef CONFIG_ATH9K_CHANNEL_CONTEXT - - if (ath9k_is_chanctx_enabled()) { - hw->wiphy->interface_modes &= ~ BIT(NL80211_IFTYPE_WDS); - hw->wiphy->iface_combinations = if_comb_multi; - hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb_multi); - hw->wiphy->max_scan_ssids = 255; - hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; - hw->wiphy->max_remain_on_channel_duration = 10000; - hw->chanctx_data_size = sizeof(void *); - hw->extra_beacon_tailroom = - sizeof(struct ieee80211_p2p_noa_attr) + 9; - - ath_dbg(common, CHAN_CTX, "Use channel contexts\n"); - } - -#endif /* CONFIG_ATH9K_CHANNEL_CONTEXT */ - hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; @@ -808,12 +815,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; - /* allow 4 queues per channel context + - * 1 cab queue + 1 offchannel tx queue - */ - hw->queues = ATH9K_NUM_TX_QUEUES; - /* last queue for offchannel */ - hw->offchannel_tx_hw_queue = hw->queues - 1; + hw->queues = 4; hw->max_rates = 4; hw->max_listen_interval = 10; hw->max_rate_tries = 10; @@ -837,6 +839,9 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw) hw->wiphy->bands[IEEE80211_BAND_5GHZ] = &common->sbands[IEEE80211_BAND_5GHZ]; +#ifdef CONFIG_ATH9K_CHANNEL_CONTEXT + ath9k_set_mcc_capab(sc, hw); +#endif ath9k_init_wow(hw); ath9k_cmn_reload_chainmask(ah); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 6f6a974f7fdb..30c66dfcd7a0 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1162,6 +1162,9 @@ static void ath9k_assign_hw_queues(struct ieee80211_hw *hw, { int i; + if (!ath9k_is_chanctx_enabled()) + return; + for (i = 0; i < IEEE80211_NUM_ACS; i++) vif->hw_queue[i] = i; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 493a183d0aaf..d6e54a3c88f6 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -169,7 +169,10 @@ static void ath_txq_skb_done(struct ath_softc *sc, struct ath_txq *txq, if (txq->stopped && txq->pending_frames < sc->tx.txq_max_pending[q]) { - ieee80211_wake_queue(sc->hw, info->hw_queue); + if (ath9k_is_chanctx_enabled()) + ieee80211_wake_queue(sc->hw, info->hw_queue); + else + ieee80211_wake_queue(sc->hw, q); txq->stopped = false; } } @@ -2247,7 +2250,10 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, fi->txq = q; if (++txq->pending_frames > sc->tx.txq_max_pending[q] && !txq->stopped) { - ieee80211_stop_queue(sc->hw, info->hw_queue); + if (ath9k_is_chanctx_enabled()) + ieee80211_stop_queue(sc->hw, info->hw_queue); + else + ieee80211_stop_queue(sc->hw, q); txq->stopped = true; } } -- cgit v1.2.3-58-ga151 From d514aefb8ce89562ef2d7dcddc530e5de6287c4b Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 21 Oct 2014 10:52:51 -0500 Subject: rtlwifi: rtl8821ae: Fix possible array overrun The kbuild test robot reported a possible array overrun. The affected code checks for overruns, but fails to take the steps necessary to fix them. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8821ae/phy.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/rtlwifi/rtl8821ae/phy.c index 9786313dc62f..1e9570fa874f 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/phy.c @@ -1889,15 +1889,18 @@ static void _rtl8821ae_store_tx_power_by_rate(struct ieee80211_hw *hw, struct rtl_phy *rtlphy = &rtlpriv->phy; u8 rate_section = _rtl8821ae_get_rate_section_index(regaddr); - if (band != BAND_ON_2_4G && band != BAND_ON_5G) + if (band != BAND_ON_2_4G && band != BAND_ON_5G) { RT_TRACE(rtlpriv, COMP_INIT, DBG_WARNING, "Invalid Band %d\n", band); - - if (rfpath >= MAX_RF_PATH) + band = BAND_ON_2_4G; + } + if (rfpath >= MAX_RF_PATH) { RT_TRACE(rtlpriv, COMP_INIT, DBG_WARNING, "Invalid RfPath %d\n", rfpath); - - if (txnum >= MAX_RF_PATH) + rfpath = MAX_RF_PATH - 1; + } + if (txnum >= MAX_RF_PATH) { RT_TRACE(rtlpriv, COMP_INIT, DBG_WARNING, "Invalid TxNum %d\n", txnum); - + txnum = MAX_RF_PATH - 1; + } rtlphy->tx_power_by_rate_offset[band][rfpath][txnum][rate_section] = data; RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "TxPwrByRateOffset[Band %d][RfPath %d][TxNum %d][RateSection %d] = 0x%x\n", -- cgit v1.2.3-58-ga151 From 94e05900770c0abe31200881df93e41d296fe8bd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 22 Oct 2014 15:27:53 +0200 Subject: ath: use CTL region from cfg80211 if unset in EEPROM Many AP devices do not have the proper regulatory domain programmed in EEPROM. Instead they expect the software to set the appropriate region. For these devices, the country code defaults to US, and the driver uses the US CTL tables as well. On devices bought in Europe this can lead to tx power being set too high on the band edges, even if the cfg80211 regdomain is set correctly. Fix this issue by taking into account the DFS region, but only when the EEPROM regdomain is set to default. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath.h | 1 + drivers/net/wireless/ath/regd.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h index e5ba6faf3281..86907e5ba6ca 100644 --- a/drivers/net/wireless/ath/ath.h +++ b/drivers/net/wireless/ath/ath.h @@ -80,6 +80,7 @@ struct reg_dmn_pair_mapping { struct ath_regulatory { char alpha2[2]; + enum nl80211_dfs_regions region; u16 country_code; u16 max_power_level; u16 current_rd; diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c index 415393dfb6fc..06ea6cc9e30a 100644 --- a/drivers/net/wireless/ath/regd.c +++ b/drivers/net/wireless/ath/regd.c @@ -515,6 +515,7 @@ void ath_reg_notifier_apply(struct wiphy *wiphy, if (!request) return; + reg->region = request->dfs_region; switch (request->initiator) { case NL80211_REGDOM_SET_BY_CORE: /* @@ -779,6 +780,19 @@ u32 ath_regd_get_band_ctl(struct ath_regulatory *reg, return SD_NO_CTL; } + if (ath_regd_get_eepromRD(reg) == CTRY_DEFAULT) { + switch (reg->region) { + case NL80211_DFS_FCC: + return CTL_FCC; + case NL80211_DFS_ETSI: + return CTL_ETSI; + case NL80211_DFS_JP: + return CTL_MKK; + default: + break; + } + } + switch (band) { case IEEE80211_BAND_2GHZ: return reg->regpair->reg_2ghz_ctl; -- cgit v1.2.3-58-ga151 From b2d624a5810203a1a8b7735e1ec5685109b22fc3 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Wed, 22 Oct 2014 15:47:32 +0200 Subject: rtl8192cu: Fix for rtlwifi's bluetooth coexist functionality Initialize function pointer with a function indicating bt coexist is not there. Prevents Ooops. Signed-off-by: Karsten Wiese Acked-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192cu/sw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 7c5fbaf5fee0..e06bafee37f9 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -101,6 +101,12 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw) } } +/* get bt coexist status */ +static bool rtl92cu_get_btc_status(void) +{ + return false; +} + static struct rtl_hal_ops rtl8192cu_hal_ops = { .init_sw_vars = rtl92cu_init_sw_vars, .deinit_sw_vars = rtl92cu_deinit_sw_vars, @@ -148,6 +154,7 @@ static struct rtl_hal_ops rtl8192cu_hal_ops = { .phy_set_bw_mode_callback = rtl92cu_phy_set_bw_mode_callback, .dm_dynamic_txpower = rtl92cu_dm_dynamic_txpower, .fill_h2c_cmd = rtl92c_fill_h2c_cmd, + .get_btc_status = rtl92cu_get_btc_status, }; static struct rtl_mod_params rtl92cu_mod_params = { -- cgit v1.2.3-58-ga151 From cefe3dfdb9f5f498cae9871f7e52800f5e22c614 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Wed, 22 Oct 2014 15:47:33 +0200 Subject: rtl8192cu: Call ieee80211_register_hw from rtl_usb_probe In a previous patch the call to ieee80211_register_hw was moved from the load firmware callback to the rtl_pci_probe only. rt8192cu also uses this callback. Currently it doesnt create a wlan%d device. Fill in the call to ieee80211_register_hw in rtl_usb_probe. Signed-off-by: Karsten Wiese Acked-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/usb.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c index 10cf69c4bc42..46ee956d0235 100644 --- a/drivers/net/wireless/rtlwifi/usb.c +++ b/drivers/net/wireless/rtlwifi/usb.c @@ -1117,7 +1117,18 @@ int rtl_usb_probe(struct usb_interface *intf, } rtlpriv->cfg->ops->init_sw_leds(hw); + err = ieee80211_register_hw(hw); + if (err) { + RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, + "Can't register mac80211 hw.\n"); + err = -ENODEV; + goto error_out; + } + rtlpriv->mac80211.mac80211_registered = 1; + + set_bit(RTL_STATUS_INTERFACE_START, &rtlpriv->status); return 0; + error_out: rtl_deinit_core(hw); _rtl_usb_io_handler_release(hw); -- cgit v1.2.3-58-ga151 From 4f2b244c7d5b81ce4f0c6c0382f3a3b7c2dbec1c Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Wed, 22 Oct 2014 15:47:34 +0200 Subject: rtl8192cu: Prevent Ooops under rtl92c_set_fw_rsvdpagepkt rtl92c_set_fw_rsvdpagepkt is used by rtl8192cu and its pci sibling rtl8192ce. rtl_cmd_send_packet crashes when called inside rtl8192cu because it works on memory allocated only by rtl8192ce. Fix the crash by calling a dummy function when used in rtl8192cu. Comparision with the realtek vendor driver makes me think, something is missing in the dummy function. Short test as WPA2 station show good results connected to an 802.11g basestation. Traffic stops after few MBytes as WPA2 station connected to an 802.11n basestation. Signed-off-by: Karsten Wiese Acked-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c | 8 ++++++-- drivers/net/wireless/rtlwifi/rtl8192c/fw_common.h | 4 +++- drivers/net/wireless/rtlwifi/rtl8192ce/hw.c | 2 +- drivers/net/wireless/rtlwifi/rtl8192cu/hw.c | 17 ++++++++++++++++- drivers/net/wireless/rtlwifi/rtl8192cu/hw.h | 1 - 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c index a00861b26ece..29983bc96a89 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c @@ -656,7 +656,8 @@ static u8 reserved_page_packet[TOTAL_RESERVED_PKT_LEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; -void rtl92c_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) +void rtl92c_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, + bool (*cmd_send_packet)(struct ieee80211_hw *, struct sk_buff *)) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -722,7 +723,10 @@ void rtl92c_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) memcpy((u8 *)skb_put(skb, totalpacketlen), &reserved_page_packet, totalpacketlen); - rtstatus = rtl_cmd_send_packet(hw, skb); + if (cmd_send_packet) + rtstatus = cmd_send_packet(hw, skb); + else + rtstatus = rtl_cmd_send_packet(hw, skb); if (rtstatus) b_dlok = true; diff --git a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.h b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.h index a815bd6273da..b64ae45dc674 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.h +++ b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.h @@ -109,7 +109,9 @@ void rtl92c_fill_h2c_cmd(struct ieee80211_hw *hw, u8 element_id, u32 cmd_len, u8 *p_cmdbuffer); void rtl92c_firmware_selfreset(struct ieee80211_hw *hw); void rtl92c_set_fw_pwrmode_cmd(struct ieee80211_hw *hw, u8 mode); -void rtl92c_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished); +void rtl92c_set_fw_rsvdpagepkt + (struct ieee80211_hw *hw, + bool (*cmd_send_packet)(struct ieee80211_hw *, struct sk_buff *)); void rtl92c_set_fw_joinbss_report_cmd(struct ieee80211_hw *hw, u8 mstatus); void usb_writeN_async(struct rtl_priv *rtlpriv, u32 addr, void *data, u16 len); void rtl92c_set_p2p_ps_offload_cmd(struct ieee80211_hw *hw, u8 p2p_ps_state); diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c index 8ec0f031f48a..55357d69397a 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c @@ -459,7 +459,7 @@ void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) rtl_write_byte(rtlpriv, REG_FWHW_TXQ_CTRL + 2, tmp_reg422 & (~BIT(6))); - rtl92c_set_fw_rsvdpagepkt(hw, 0); + rtl92c_set_fw_rsvdpagepkt(hw, NULL); _rtl92ce_set_bcn_ctrl_reg(hw, BIT(3), 0); _rtl92ce_set_bcn_ctrl_reg(hw, 0, BIT(4)); diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c index 04aa0b5f5b3d..873363acbacf 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c @@ -1592,6 +1592,20 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } } +bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + /* Currently nothing happens here. + * Traffic stops after some seconds in WPA2 802.11n mode. + * Maybe because rtl8192cu chip should be set from here? + * If I understand correctly, the realtek vendor driver sends some urbs + * if its "here". + * + * This is maybe necessary: + * rtlpriv->cfg->ops->fill_tx_cmddesc(hw, buffer, 1, 1, skb); + */ + return true; +} + void rtl92cu_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) { struct rtl_priv *rtlpriv = rtl_priv(hw); @@ -1939,7 +1953,8 @@ void rtl92cu_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) recover = true; rtl_write_byte(rtlpriv, REG_FWHW_TXQ_CTRL + 2, tmp_reg422 & (~BIT(6))); - rtl92c_set_fw_rsvdpagepkt(hw, 0); + rtl92c_set_fw_rsvdpagepkt(hw, + &usb_cmd_send_packet); _rtl92cu_set_bcn_ctrl_reg(hw, BIT(3), 0); _rtl92cu_set_bcn_ctrl_reg(hw, 0, BIT(4)); if (recover) diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.h b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.h index 0f7812e0c8aa..c1e33b0228c0 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.h +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.h @@ -104,7 +104,6 @@ bool rtl92cu_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 * valid); void rtl92cu_set_check_bssid(struct ieee80211_hw *hw, bool check_bssid); int rtl92c_download_fw(struct ieee80211_hw *hw); void rtl92c_set_fw_pwrmode_cmd(struct ieee80211_hw *hw, u8 mode); -void rtl92c_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool dl_finished); void rtl92c_set_fw_joinbss_report_cmd(struct ieee80211_hw *hw, u8 mstatus); void rtl92c_fill_h2c_cmd(struct ieee80211_hw *hw, u8 element_id, u32 cmd_len, u8 *p_cmdbuffer); -- cgit v1.2.3-58-ga151 From 763254516187015cb5b391553af35c6ed1f4bb36 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 22 Oct 2014 18:17:35 +0200 Subject: ath9k_common: always update value in ath9k_cmn_update_txpow In some cases the limit may be the same as reg->power_limit, but the actual value that the hardware uses is not up to date. In that case, a wrong value for current tx power is tracked internally. Fix this by unconditionally updating it. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index c6dd7f1fed65..33b0c7aef2ea 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -368,11 +368,11 @@ void ath9k_cmn_update_txpow(struct ath_hw *ah, u16 cur_txpow, { struct ath_regulatory *reg = ath9k_hw_regulatory(ah); - if (reg->power_limit != new_txpow) { + if (reg->power_limit != new_txpow) ath9k_hw_set_txpowerlimit(ah, new_txpow, false); - /* read back in case value is clamped */ - *txpower = reg->max_power_level; - } + + /* read back in case value is clamped */ + *txpower = reg->max_power_level; } EXPORT_SYMBOL(ath9k_cmn_update_txpow); -- cgit v1.2.3-58-ga151 From 08054200117a95afc14c3d2ed3a38bf4e345bf78 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 23 Oct 2014 11:27:09 -0500 Subject: rtlwifi: Add check for get_btc_status callback Drivers that do not use the get_btc_status() callback may not define a dummy routine. The caller needs to check before making the call. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Cc: Mike Galbraith Cc: Thadeu Cascardo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 667aba81246c..25daa8715219 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1796,7 +1796,8 @@ static int rtl_pci_start(struct ieee80211_hw *hw) rtl_pci_reset_trx_ring(hw); rtlpci->driver_is_goingto_unload = false; - if (rtlpriv->cfg->ops->get_btc_status()) { + if (rtlpriv->cfg->ops->get_btc_status && + rtlpriv->cfg->ops->get_btc_status()) { rtlpriv->btcoexist.btc_ops->btc_init_variables(rtlpriv); rtlpriv->btcoexist.btc_ops->btc_init_hal_vars(rtlpriv); } -- cgit v1.2.3-58-ga151 From 9180ac50716a097a407c6d7e7e4589754a922260 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 23 Sep 2014 23:02:41 +0300 Subject: iwlwifi: configure the LTR The LTR is the handshake between the device and the root complex about the latency allowed when the bus exits power save. This configuration was missing and this led to high latency in the link power up. The end user could experience high latency in the network because of this. Cc: [3.10+] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-trans.h | 2 ++ drivers/net/wireless/iwlwifi/mvm/fw-api-power.h | 35 ++++++++++++++++++++++++- drivers/net/wireless/iwlwifi/mvm/fw-api.h | 1 + drivers/net/wireless/iwlwifi/mvm/fw.c | 9 +++++++ drivers/net/wireless/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/iwlwifi/pcie/trans.c | 16 ++++++----- 6 files changed, 56 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h index 9eb85249e89c..d8fc548c0d6c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/iwlwifi/iwl-trans.h @@ -563,6 +563,7 @@ enum iwl_trans_state { * Set during transport allocation. * @hw_id_str: a string with info about HW ID. Set during transport allocation. * @pm_support: set to true in start_hw if link pm is supported + * @ltr_enabled: set to true if the LTR is enabled * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. * The user should use iwl_trans_{alloc,free}_tx_cmd. * @dev_cmd_headroom: room needed for the transport's private use before the @@ -589,6 +590,7 @@ struct iwl_trans { u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size; bool pm_support; + bool ltr_enabled; /* The following fields are internal only */ struct kmem_cache *dev_cmd_pool; diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h index 27dd86395b39..2fd8ad4633e0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h @@ -68,13 +68,46 @@ /* Power Management Commands, Responses, Notifications */ +/** + * enum iwl_ltr_config_flags - masks for LTR config command flags + * @LTR_CFG_FLAG_FEATURE_ENABLE: Feature operational status + * @LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS: allow LTR change on shadow + * memory access + * @LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH: allow LTR msg send on ANY LTR + * reg change + * @LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3: allow LTR msg send on transition from + * D0 to D3 + * @LTR_CFG_FLAG_SW_SET_SHORT: fixed static short LTR register + * @LTR_CFG_FLAG_SW_SET_LONG: fixed static short LONG register + * @LTR_CFG_FLAG_DENIE_C10_ON_PD: allow going into C10 on PD + */ +enum iwl_ltr_config_flags { + LTR_CFG_FLAG_FEATURE_ENABLE = BIT(0), + LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS = BIT(1), + LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH = BIT(2), + LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3 = BIT(3), + LTR_CFG_FLAG_SW_SET_SHORT = BIT(4), + LTR_CFG_FLAG_SW_SET_LONG = BIT(5), + LTR_CFG_FLAG_DENIE_C10_ON_PD = BIT(6), +}; + +/** + * struct iwl_ltr_config_cmd - configures the LTR + * @flags: See %enum iwl_ltr_config_flags + */ +struct iwl_ltr_config_cmd { + __le32 flags; + __le32 static_long; + __le32 static_short; +} __packed; + /* Radio LP RX Energy Threshold measured in dBm */ #define POWER_LPRX_RSSI_THRESHOLD 75 #define POWER_LPRX_RSSI_THRESHOLD_MAX 94 #define POWER_LPRX_RSSI_THRESHOLD_MIN 30 /** - * enum iwl_scan_flags - masks for power table command flags + * enum iwl_power_flags - masks for power table command flags * @POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off * receiver and transmitter. '0' - does not allow. * @POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management, diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h index 667a92274c87..c62575d86bcd 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h @@ -157,6 +157,7 @@ enum { /* Power - legacy power table command */ POWER_TABLE_CMD = 0x77, PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION = 0x78, + LTR_CONFIG = 0xee, /* Thermal Throttling*/ REPLY_THERMAL_MNG_BACKOFF = 0x7e, diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index 23fd711a67e4..e0d9f19650b0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -480,6 +480,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm) /* Initialize tx backoffs to the minimal possible */ iwl_mvm_tt_tx_backoff(mvm, 0); + if (mvm->trans->ltr_enabled) { + struct iwl_ltr_config_cmd cmd = { + .flags = cpu_to_le32(LTR_CFG_FLAG_FEATURE_ENABLE), + }; + + WARN_ON(iwl_mvm_send_cmd_pdu(mvm, LTR_CONFIG, 0, + sizeof(cmd), &cmd)); + } + ret = iwl_mvm_power_update_device(mvm); if (ret) goto error; diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 15aa298ee79c..48cb25a93591 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -336,6 +336,7 @@ static const char *const iwl_mvm_cmd_strings[REPLY_MAX] = { CMD(DTS_MEASUREMENT_NOTIFICATION), CMD(REPLY_THERMAL_MNG_BACKOFF), CMD(MAC_PM_POWER_TABLE), + CMD(LTR_CONFIG), CMD(BT_COEX_CI), CMD(BT_COEX_UPDATE_SW_BOOST), CMD(BT_COEX_UPDATE_CORUN_LUT), diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 1393bac0025c..c706dba67cdd 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -174,6 +174,7 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u16 lctl; + u16 cap; /* * HW bug W/A for instability in PCIe bus L0S->L1 transition. @@ -184,16 +185,17 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans) * power savings, even without L1. */ pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); - if (lctl & PCI_EXP_LNKCTL_ASPM_L1) { - /* L1-ASPM enabled; disable(!) L0S */ + if (lctl & PCI_EXP_LNKCTL_ASPM_L1) iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); - dev_info(trans->dev, "L1 Enabled; Disabling L0S\n"); - } else { - /* L1-ASPM disabled; enable(!) L0S */ + else iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); - dev_info(trans->dev, "L1 Disabled; Enabling L0S\n"); - } trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); + + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); + trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN; + dev_info(trans->dev, "L1 %sabled - LTR %sabled\n", + (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis", + trans->ltr_enabled ? "En" : "Dis"); } /* -- cgit v1.2.3-58-ga151 From 405b7338abc5ceac4a420ce7f49cc9b530d4e78b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 23 Sep 2014 15:15:17 +0300 Subject: iwlwifi: 8000: fix string given to MODULE_FIRMWARE I changed the string but forgot to update the fix also to MODULE_FIRMWARE(). Signed-off-by: Liad Kaufman Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-8000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c index e4351487ca72..d2b7234b1c73 100644 --- a/drivers/net/wireless/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/iwlwifi/iwl-8000.c @@ -82,7 +82,8 @@ #define IWL8000_TX_POWER_VERSION 0xffff /* meaningless */ #define IWL8000_FW_PRE "iwlwifi-8000" -#define IWL8000_MODULE_FIRMWARE(api) IWL8000_FW_PRE __stringify(api) ".ucode" +#define IWL8000_MODULE_FIRMWARE(api) \ + IWL8000_FW_PRE "-" __stringify(api) ".ucode" #define NVM_HW_SECTION_NUM_FAMILY_8000 10 #define DEFAULT_NVM_FILE_FAMILY_8000 "iwl_nvm_8000.bin" -- cgit v1.2.3-58-ga151 From d14b28fd2c61af0bf310230472e342864d799c98 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 22 Sep 2014 16:12:24 +0300 Subject: iwlwifi: mvm: BT Coex - update the MPLUT Boost register value Cc: [3.16+] Fixes: 2adc8949efab ("iwlwifi: mvm: BT Coex - fix boost register / LUT values") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/coex.c | 4 ++-- drivers/net/wireless/iwlwifi/mvm/coex_legacy.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c index 8df2021f9856..da2ffb785194 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex.c @@ -303,8 +303,8 @@ static const __le64 iwl_ci_mask[][3] = { }; static const __le32 iwl_bt_mprio_lut[BT_COEX_MULTI_PRIO_LUT_SIZE] = { - cpu_to_le32(0x28412201), - cpu_to_le32(0x11118451), + cpu_to_le32(0x2e402280), + cpu_to_le32(0x7711a751), }; struct corunning_block_luts { diff --git a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c index 585c0ab4a3ec..8a1d2f33d5b7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c @@ -291,8 +291,8 @@ static const __le64 iwl_ci_mask[][3] = { }; static const __le32 iwl_bt_mprio_lut[BT_COEX_MULTI_PRIO_LUT_SIZE] = { - cpu_to_le32(0x28412201), - cpu_to_le32(0x11118451), + cpu_to_le32(0x2e402280), + cpu_to_le32(0x7711a751), }; struct corunning_block_luts { -- cgit v1.2.3-58-ga151 From 3856b78c1be32a2afe0618c7a84e05ff8c03cf10 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 22 Sep 2014 12:03:41 +0300 Subject: iwlwifi: mvm: BT coex - fix BT prio for probe requests The probe requests sent during scan must get BT prio 3. Fix that. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index cb85e63c20aa..b280d5d87127 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -459,7 +459,8 @@ int iwl_mvm_scan_request(struct iwl_mvm *mvm, basic_ssid ? 1 : 0); cmd->tx_cmd.tx_flags = cpu_to_le32(TX_CMD_FLG_SEQ_CTL | - TX_CMD_FLG_BT_DIS); + 3 << TX_CMD_FLG_BT_PRIO_POS); + cmd->tx_cmd.sta_id = mvm->aux_sta.sta_id; cmd->tx_cmd.life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE); cmd->tx_cmd.rate_n_flags = -- cgit v1.2.3-58-ga151 From 79b7a69d730180d8bf535e52fe2b4f3dd5904007 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 14 Sep 2014 12:40:00 +0300 Subject: iwlwifi: mvm: Add tx power condition to bss_info_changed_ap_ibss The tx power should be limited from many reasons. currently, setting the tx power is available by the mvm only for station interface. Adding the tx power condition to bss_info_changed_ap_ibss make it available also for AP. Signed-off-by: Haim Dreyfuss Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index c7a73c68bdab..892a6bc26686 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1734,6 +1734,13 @@ iwl_mvm_bss_info_changed_ap_ibss(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_BEACON && iwl_mvm_mac_ctxt_beacon_changed(mvm, vif)) IWL_WARN(mvm, "Failed updating beacon data\n"); + + if (changes & BSS_CHANGED_TXPOWER) { + IWL_DEBUG_CALIB(mvm, "Changing TX Power to %d\n", + bss_conf->txpower); + iwl_mvm_set_tx_power(mvm, vif, bss_conf->txpower); + } + } static void iwl_mvm_bss_info_changed(struct ieee80211_hw *hw, -- cgit v1.2.3-58-ga151 From a6cc5163149532734b84c86cbffa4994e527074b Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Mon, 29 Sep 2014 11:46:04 +0300 Subject: iwlwifi: mvm: ROC - bug fixes around time events and locking Don't add the time event to the list. We added it several times the same time event, which leads to an infinite loop when walking the list. Since we (currently) don't support more than one ROC for STA vif at a time, enforce this and don't add the time event to any list. We were also missing the locking of the mutex which led to a lockdep splat - fix that. Signed-off-by: Matti Gottlieb Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 25 ++++++++++++++++--------- drivers/net/wireless/iwlwifi/mvm/time-event.c | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 892a6bc26686..585fe5b7100f 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -526,7 +526,8 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, } if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - !test_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status)) + !test_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status) && + !test_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) goto drop; /* treat non-bufferable MMPDUs as broadcast if sta is sleeping */ @@ -2374,14 +2375,19 @@ static int iwl_mvm_send_aux_roc_cmd(struct iwl_mvm *mvm, /* Set the node address */ memcpy(aux_roc_req.node_addr, vif->addr, ETH_ALEN); + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->time_event_lock); + + if (WARN_ON(te_data->id == HOT_SPOT_CMD)) { + spin_unlock_bh(&mvm->time_event_lock); + return -EIO; + } + te_data->vif = vif; te_data->duration = duration; te_data->id = HOT_SPOT_CMD; - lockdep_assert_held(&mvm->mutex); - - spin_lock_bh(&mvm->time_event_lock); - list_add_tail(&te_data->list, &mvm->time_event_list); spin_unlock_bh(&mvm->time_event_lock); /* @@ -2437,22 +2443,23 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw, IWL_DEBUG_MAC80211(mvm, "enter (%d, %d, %d)\n", channel->hw_value, duration, type); + mutex_lock(&mvm->mutex); + switch (vif->type) { case NL80211_IFTYPE_STATION: /* Use aux roc framework (HS20) */ ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, vif, duration); - return ret; + goto out_unlock; case NL80211_IFTYPE_P2P_DEVICE: /* handle below */ break; default: IWL_ERR(mvm, "vif isn't P2P_DEVICE: %d\n", vif->type); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } - mutex_lock(&mvm->mutex); - for (i = 0; i < NUM_PHY_CTX; i++) { phy_ctxt = &mvm->phy_ctxts[i]; if (phy_ctxt->ref == 0 || mvmvif->phy_ctxt == phy_ctxt) diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c index b7f9e61d14e2..6dfad230be5e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c @@ -305,8 +305,8 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, te_data->running = false; te_data->vif = NULL; te_data->uid = 0; + te_data->id = TE_MAX; } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { - set_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status); set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); te_data->running = true; ieee80211_ready_on_channel(mvm->hw); /* Start TE */ -- cgit v1.2.3-58-ga151 From a0855054e59b0c5b2b00237fdb5147f7bcc18efb Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 5 Oct 2014 09:11:14 +0300 Subject: iwlwifi: dvm: drop non VO frames when flushing When mac80211 wants to ensure that a frame is sent, it calls the flush() callback. Until now, iwldvm implemented this by waiting that all the frames are sent (ACKed or timeout). In case of weak signal, this can take a significant amount of time, delaying the next connection (in case of roaming). Many users have reported that the flush would take too long leading to the following error messages to be printed: iwlwifi 0000:03:00.0: fail to flush all tx fifo queues Q 2 iwlwifi 0000:03:00.0: Current SW read_ptr 161 write_ptr 201 iwl data: 00000000: 00 00 00 00 00 00 00 00 fe ff 01 00 00 00 00 00 [snip] iwlwifi 0000:03:00.0: FH TRBs(0) = 0x00000000 [snip] iwlwifi 0000:03:00.0: Q 0 is active and mapped to fifo 3 ra_tid 0x0000 [9,9] [snip] Instead of waiting for these packets, simply drop them. This significantly improves the responsiveness of the network. Note that all the queues are flushed, but the VO one. This is not typically used by the applications and it likely contains management frames that are useful for connection or roaming. This bug is tracked here: https://bugzilla.kernel.org/show_bug.cgi?id=56581 But it is duplicated in distributions' trackers. A simple search in Ubuntu's database led to these bugs: https://bugs.launchpad.net/ubuntu/+source/linux-firmware/+bug/1270808 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1305406 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1356236 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1360597 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1361809 Cc: Depends-on: 77be2c54c5bd ("mac80211: add vif to flush call") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 2364a3c09b9e..cae692ff1013 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1095,6 +1095,7 @@ static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); + u32 scd_queues; mutex_lock(&priv->mutex); IWL_DEBUG_MAC80211(priv, "enter\n"); @@ -1108,18 +1109,19 @@ static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, goto done; } - /* - * mac80211 will not push any more frames for transmit - * until the flush is completed - */ - if (drop) { - IWL_DEBUG_MAC80211(priv, "send flush command\n"); - if (iwlagn_txfifo_flush(priv, 0)) { - IWL_ERR(priv, "flush request fail\n"); - goto done; - } + scd_queues = BIT(priv->cfg->base_params->num_of_queues) - 1; + scd_queues &= ~(BIT(IWL_IPAN_CMD_QUEUE_NUM) | + BIT(IWL_DEFAULT_CMD_QUEUE_NUM)); + + if (vif) + scd_queues &= ~BIT(vif->hw_queue[IEEE80211_AC_VO]); + + IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n", scd_queues); + if (iwlagn_txfifo_flush(priv, scd_queues)) { + IWL_ERR(priv, "flush request fail\n"); + goto done; } - IWL_DEBUG_MAC80211(priv, "wait transmit/flush all frames\n"); + IWL_DEBUG_TX_QUEUES(priv, "wait transmit/flush all frames\n"); iwl_trans_wait_tx_queue_empty(priv->trans, 0xffffffff); done: mutex_unlock(&priv->mutex); -- cgit v1.2.3-58-ga151 From 1ffde699aae127e7abdb98dbdedc2cc6a973a1a1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Oct 2014 08:29:55 +0300 Subject: Revert "iwlwifi: mvm: treat EAPOLs like mgmt frames wrt rate" This reverts commit aa11bbf3df026d6b1c6b528bef634fd9de7c2619. This commit was causing connection issues and is not needed if IWL_MVM_RS_RSSI_BASED_INIT_RATE is set to false by default. Regardless of the issues mentioned above, this patch added the following WARNING: WARNING: CPU: 0 PID: 3946 at drivers/net/wireless/iwlwifi/mvm/tx.c:190 iwl_mvm_set_tx_params+0x60a/0x6f0 [iwlmvm]() Got an HT rate for a non data frame 0x8 CPU: 0 PID: 3946 Comm: wpa_supplicant Tainted: G O 3.17.0+ #6 Hardware name: LENOVO 20ANCTO1WW/20ANCTO1WW, BIOS GLET71WW (2.25 ) 07/02/2014 0000000000000009 ffffffff814fa911 ffff8804288db8f8 ffffffff81064f52 0000000000001808 ffff8804288db948 ffff88040add8660 ffff8804291b5600 0000000000000000 ffffffff81064fb7 ffffffffa07b73d0 0000000000000020 Call Trace: [] ? dump_stack+0x41/0x51 [] ? warn_slowpath_common+0x72/0x90 [] ? warn_slowpath_fmt+0x47/0x50 [] ? iwl_mvm_set_tx_params+0x60a/0x6f0 [iwlmvm] [] ? iwl_mvm_tx_skb+0x48/0x3c0 [iwlmvm] [] ? iwl_mvm_mac_tx+0x7b/0x180 [iwlmvm] [] ? __ieee80211_tx+0x2b9/0x3c0 [mac80211] [] ? ieee80211_tx+0xb3/0x100 [mac80211] [] ? ieee80211_subif_start_xmit+0x459/0xca0 [mac80211] [] ? dev_hard_start_xmit+0x337/0x5f0 [] ? sch_direct_xmit+0x96/0x1f0 [] ? __dev_queue_xmit+0x203/0x4f0 [] ? ether_setup+0x70/0x70 [] ? packet_sendmsg+0xf81/0x1110 [] ? skb_free_datagram+0xc/0x40 [] ? sock_sendmsg+0x88/0xc0 [] ? move_addr_to_kernel.part.20+0x14/0x60 [] ? __inode_wait_for_writeback+0x62/0xb0 [] ? SYSC_sendto+0xf1/0x180 [] ? __sys_recvmsg+0x39/0x70 [] ? system_call_fastpath+0x1a/0x1f ---[ end trace cc19a150d311fc63 ]--- which was reported here: https://bugzilla.kernel.org/show_bug.cgi?id=85691 CC: [3.13+] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/tx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 1cb793a498ac..c6a517c771df 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -175,14 +175,10 @@ static void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, /* * for data packets, rate info comes from the table inside the fw. This - * table is controlled by LINK_QUALITY commands. Exclude ctrl port - * frames like EAPOLs which should be treated as mgmt frames. This - * avoids them being sent initially in high rates which increases the - * chances for completion of the 4-Way handshake. + * table is controlled by LINK_QUALITY commands */ - if (ieee80211_is_data(fc) && sta && - !(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) { + if (ieee80211_is_data(fc) && sta) { tx_cmd->initial_rate_index = 0; tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_STA_RATE); return; -- cgit v1.2.3-58-ga151 From 7f2ac8fb31896c9fb70dbd2a2e6642b79996fc13 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 23 Oct 2014 08:53:21 +0300 Subject: iwlwifi: pcie: fix polling in various places iwl_poll_bit may return a strictly positive value when the poll doesn't match on the first try. This was caught when WoWLAN started failing upon resume even if the poll_bit actually succeeded. Also change a wrong print. If we reach the end of iwl_pcie_prepare_card_hw, it means that we couldn't get the devices. Reviewed-by: Johannes Berg Reviewed-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index c706dba67cdd..3781b029e54a 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -430,7 +430,7 @@ static int iwl_pcie_apm_stop_master(struct iwl_trans *trans) ret = iwl_poll_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_MASTER_DISABLED, CSR_RESET_REG_FLAG_MASTER_DISABLED, 100); - if (ret) + if (ret < 0) IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n"); IWL_DEBUG_INFO(trans, "stop master\n"); @@ -546,7 +546,7 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) msleep(25); } - IWL_DEBUG_INFO(trans, "got NIC after %d iterations\n", iter); + IWL_ERR(trans, "Couldn't prepare the card\n"); return ret; } @@ -1045,7 +1045,7 @@ static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans, CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000); - if (ret) { + if (ret < 0) { IWL_ERR(trans, "Failed to resume the device (mac ready)\n"); return ret; } -- cgit v1.2.3-58-ga151 From 7677e86843e2136a9b05549a9ca47d4f744565b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 Oct 2014 22:18:02 +0800 Subject: bridge: Do not compile options in br_parse_ip_options Commit 462fb2af9788a82a534f8184abfde31574e1cfa0 bridge : Sanitize skb before it enters the IP stack broke when IP options are actually used because it mangles the skb as if it entered the IP stack which is wrong because the bridge is supposed to operate below the IP stack. Since nobody has actually requested for parsing of IP options this patch fixes it by simply reverting to the previous approach of ignoring all IP options, i.e., zeroing the IPCB. If and when somebody who uses IP options and actually needs them to be parsed by the bridge complains then we can revisit this. Reported-by: David Newall Signed-off-by: Herbert Xu Tested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1bada53bb195..1a4f32c09ad5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) static int br_parse_ip_options(struct sk_buff *skb) { - struct ip_options *opt; const struct iphdr *iph; struct net_device *dev = skb->dev; u32 len; @@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb) goto inhdr_error; iph = ip_hdr(skb); - opt = &(IPCB(skb)->opt); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) @@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb) } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (iph->ihl == 5) - return 0; - - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev_net(dev), opt, skb)) - goto inhdr_error; - - /* Check correct handling of SRR option */ - if (unlikely(opt->srr)) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) - goto drop; - - if (ip_options_rcv_srr(skb)) - goto drop; - } - + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ return 0; inhdr_error: -- cgit v1.2.3-58-ga151 From 9dfa1dfe4d5e5e66a991321ab08afe69759d797a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:06 +0200 Subject: netfilter: nf_log: account for size of NLMSG_DONE attribute We currently neither account for the nlattr size, nor do we consider the size of the trailing NLMSG_DONE when allocating nlmsg skb. This can result in nflog to stop working, as __nfulnl_send() re-tries sending forever if it failed to append NLMSG_DONE (which will never work if buffer is not large enough). Reported-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1e3a0579416..8117fba8e661 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -649,7 +649,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -692,8 +693,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); -- cgit v1.2.3-58-ga151 From c1e7dc91eed0ed1a51c9b814d648db18bf8fc6e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:07 +0200 Subject: netfilter: nfnetlink_log: fix maximum packet length logged to userspace don't try to queue payloads > 0xffff - NLA_HDRLEN, it does not work. The nla length includes the size of the nla struct, so anything larger results in u16 integer overflow. This patch is similar to 9cefbbc9c8f9abe (netfilter: nfnetlink_queue: cleanup copy_range usage). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8117fba8e661..2d02eac35415 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -43,7 +43,8 @@ #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -679,8 +682,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; -- cgit v1.2.3-58-ga151 From b51d3fa364885a2c1e1668f88776c67c95291820 Mon Sep 17 00:00:00 2001 From: Houcheng Lin Date: Thu, 23 Oct 2014 10:36:08 +0200 Subject: netfilter: nf_log: release skbuff on nlmsg put failure The kernel should reserve enough room in the skb so that the DONE message can always be appended. However, in case of e.g. new attribute erronously not being size-accounted for, __nfulnl_send() will still try to put next nlmsg into this full skbuf, causing the skb to be stuck forever and blocking delivery of further messages. Fix issue by releasing skb immediately after nlmsg_put error and WARN() so we can track down the cause of such size mismatch. [ fw@strlen.de: add tailroom/len info to WARN ] Signed-off-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2d02eac35415..5f1be5ba3559 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -346,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void -- cgit v1.2.3-58-ga151 From 5345c1d417c1b0caf46fd2766d16bb4357a347d8 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 22 Oct 2014 21:35:15 +0200 Subject: ptp: restore the makefile for building the test program. This patch brings back the makefile called testptp.mk which was removed in commit adb19fb66eee (Documentation: add makefiles for more targets). While the idea of that commit was to improve build coverage of the examples, the new Makefile is unable to cross compile the testptp program. In contrast, the deleted makefile was able to do this just fine. This patch fixes the regression by restoring the original makefile. Signed-off-by: Richard Cochran Acked-by: Peter Foley Signed-off-by: David S. Miller --- Documentation/ptp/testptp.mk | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/ptp/testptp.mk diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) -- cgit v1.2.3-58-ga151 From bc96f648df1bbc2729abbb84513cf4f64273a1f1 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Oct 2014 14:08:53 +0100 Subject: xen-netback: make feature-rx-notify mandatory Frontends that do not provide feature-rx-notify may stall because netback depends on the notification from frontend to wake the guest Rx thread (even if can_queue is false). This could be fixed but feature-rx-notify was introduced in 2006 and I am not aware of any frontends that do not implement this. Signed-off-by: David Vrabel Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 5 ----- drivers/net/xen-netback/interface.c | 12 +----------- drivers/net/xen-netback/xenbus.c | 13 ++++--------- 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index d4eb8d2e9cb7..93ca77c129c3 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -228,9 +228,6 @@ struct xenvif { u8 ip_csum:1; u8 ipv6_csum:1; - /* Internal feature information. */ - u8 can_queue:1; /* can queue packets for receiver? */ - /* Is this interface disabled? True when backend discovers * frontend is rogue. */ @@ -272,8 +269,6 @@ void xenvif_xenbus_fini(void); int xenvif_schedulable(struct xenvif *vif); -int xenvif_must_stop_queue(struct xenvif_queue *queue); - int xenvif_queue_stopped(struct xenvif_queue *queue); void xenvif_wake_queue(struct xenvif_queue *queue); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f379689dde30..c6759b1ec18d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -60,16 +60,6 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) atomic_dec(&queue->inflight_packets); } -static inline void xenvif_stop_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - - if (!queue->vif->can_queue) - return; - - netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); -} - int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && @@ -209,7 +199,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { queue->rx_stalled.function = xenvif_rx_stalled; queue->rx_stalled.data = (unsigned long)queue; - xenvif_stop_queue(queue); + netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); mod_timer(&queue->rx_stalled, jiffies + rx_drain_timeout_jiffies); } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 8079c31ac5e6..9060857c9022 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -873,15 +873,10 @@ static int read_xenbus_vif_flags(struct backend_info *be) if (!rx_copy) return -EOPNOTSUPP; - if (vif->dev->tx_queue_len != 0) { - if (xenbus_scanf(XBT_NIL, dev->otherend, - "feature-rx-notify", "%d", &val) < 0) - val = 0; - if (val) - vif->can_queue = 1; - else - /* Must be non-zero for pfifo_fast to work. */ - vif->dev->tx_queue_len = 1; + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-rx-notify", "%d", &val) < 0 || val == 0) { + xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory"); + return -EINVAL; } if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", -- cgit v1.2.3-58-ga151 From f48da8b14d04ca87ffcffe68829afd45f926ec6a Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Oct 2014 14:08:54 +0100 Subject: xen-netback: fix unlimited guest Rx internal queue and carrier flapping Netback needs to discard old to-guest skb's (guest Rx queue drain) and it needs detect guest Rx stalls (to disable the carrier so packets are discarded earlier), but the current implementation is very broken. 1. The check in hard_start_xmit of the slot availability did not consider the number of packets that were already in the guest Rx queue. This could allow the queue to grow without bound. The guest stops consuming packets and the ring was allowed to fill leaving S slot free. Netback queues a packet requiring more than S slots (ensuring that the ring stays with S slots free). Netback queue indefinately packets provided that then require S or fewer slots. 2. The Rx stall detection is not triggered in this case since the (host) Tx queue is not stopped. 3. If the Tx queue is stopped and a guest Rx interrupt occurs, netback will consider this an Rx purge event which may result in it taking the carrier down unnecessarily. It also considers a queue with only 1 slot free as unstalled (even though the next packet might not fit in this). The internal guest Rx queue is limited by a byte length (to 512 Kib, enough for half the ring). The (host) Tx queue is stopped and started based on this limit. This sets an upper bound on the amount of memory used by packets on the internal queue. This allows the estimatation of the number of slots for an skb to be removed (it wasn't a very good estimate anyway). Instead, the guest Rx thread just waits for enough free slots for a maximum sized packet. skbs queued on the internal queue have an 'expires' time (set to the current time plus the drain timeout). The guest Rx thread will detect when the skb at the head of the queue has expired and discard expired skbs. This sets a clear upper bound on the length of time an skb can be queued for. For a guest being destroyed the maximum time needed to wait for all the packets it sent to be dropped is still the drain timeout (10 s) since it will not be sending new packets. Rx stall detection is reintroduced in a later commit. Signed-off-by: David Vrabel Reviewed-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 29 +++-- drivers/net/xen-netback/interface.c | 59 ++------- drivers/net/xen-netback/netback.c | 243 +++++++++++++++++++----------------- drivers/net/xen-netback/xenbus.c | 8 ++ 4 files changed, 161 insertions(+), 178 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 93ca77c129c3..c2642402b7a1 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -176,10 +176,9 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */ struct xen_netif_rx_back_ring rx; struct sk_buff_head rx_queue; - RING_IDX rx_last_skb_slots; - unsigned long status; - struct timer_list rx_stalled; + unsigned int rx_queue_max; + unsigned int rx_queue_len; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; @@ -199,18 +198,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */ struct xenvif_stats stats; }; +/* Maximum number of Rx slots a to-guest packet may use, including the + * slot needed for GSO meta-data. + */ +#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1) + enum state_bit_shift { /* This bit marks that the vif is connected */ VIF_STATUS_CONNECTED, - /* This bit signals the RX thread that queuing was stopped (in - * start_xmit), and either the timer fired or an RX interrupt came - */ - QUEUE_STATUS_RX_PURGE_EVENT, - /* This bit tells the interrupt handler that this queue was the reason - * for the carrier off, so it should kick the thread. Only queues which - * brought it down can turn on the carrier. - */ - QUEUE_STATUS_RX_STALLED }; struct xenvif { @@ -246,6 +241,14 @@ struct xenvif { struct net_device *dev; }; +struct xenvif_rx_cb { + unsigned long expires; + int meta_slots_used; + bool full_coalesce; +}; + +#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) + static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) { return to_xenbus_device(vif->dev->dev.parent); @@ -291,6 +294,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + /* Determine whether the needed number of slots (req) are available, * and set req_event if not. */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index c6759b1ec18d..a134d52f55b4 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -43,6 +43,9 @@ #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 +/* Number of bytes allowed on the internal guest Rx queue. */ +#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) + /* This function is used to set SKBTX_DEV_ZEROCOPY as well as * increasing the inflight counter. We need to increase the inflight * counter because core driver calls into xenvif_zerocopy_callback @@ -63,7 +66,8 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && - test_bit(VIF_STATUS_CONNECTED, &vif->status); + test_bit(VIF_STATUS_CONNECTED, &vif->status) && + !vif->disabled; } static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) @@ -104,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget) static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; - struct netdev_queue *net_queue = - netdev_get_tx_queue(queue->vif->dev, queue->id); - /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR - * the carrier went down and this queue was previously blocked - */ - if (unlikely(netif_tx_queue_stopped(net_queue) || - (!netif_carrier_ok(queue->vif->dev) && - test_bit(QUEUE_STATUS_RX_STALLED, &queue->status)))) - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); xenvif_kick_thread(queue); return IRQ_HANDLED; @@ -141,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue) netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); } -/* Callback to wake the queue's thread and turn the carrier off on timeout */ -static void xenvif_rx_stalled(unsigned long data) -{ - struct xenvif_queue *queue = (struct xenvif_queue *)data; - - if (xenvif_queue_stopped(queue)) { - set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status); - xenvif_kick_thread(queue); - } -} - static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; unsigned int num_queues = vif->num_queues; u16 index; - int min_slots_needed; + struct xenvif_rx_cb *cb; BUG_ON(skb->dev != dev); @@ -181,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) !xenvif_schedulable(vif)) goto drop; - /* At best we'll need one slot for the header and one for each - * frag. - */ - min_slots_needed = 1 + skb_shinfo(skb)->nr_frags; + cb = XENVIF_RX_CB(skb); + cb->expires = jiffies + rx_drain_timeout_jiffies; - /* If the skb is GSO then we'll also need an extra slot for the - * metadata. - */ - if (skb_is_gso(skb)) - min_slots_needed++; - - /* If the skb can't possibly fit in the remaining slots - * then turn off the queue to give the ring a chance to - * drain. - */ - if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) { - queue->rx_stalled.function = xenvif_rx_stalled; - queue->rx_stalled.data = (unsigned long)queue; - netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - mod_timer(&queue->rx_stalled, - jiffies + rx_drain_timeout_jiffies); - } - - skb_queue_tail(&queue->rx_queue, skb); + xenvif_rx_queue_tail(queue, skb); xenvif_kick_thread(queue); return NETDEV_TX_OK; @@ -498,6 +462,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) init_timer(&queue->credit_timeout); queue->credit_window_start = get_jiffies_64(); + queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; + skb_queue_head_init(&queue->rx_queue); skb_queue_head_init(&queue->tx_queue); @@ -529,8 +495,6 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; } - init_timer(&queue->rx_stalled); - return 0; } @@ -664,7 +628,6 @@ void xenvif_disconnect(struct xenvif *vif) netif_napi_del(&queue->napi); if (queue->task) { - del_timer_sync(&queue->rx_stalled); kthread_stop(queue->task); queue->task = NULL; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 08f65996534c..57aa3b507d32 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -55,8 +55,8 @@ bool separate_tx_rx_irq = 1; module_param(separate_tx_rx_irq, bool, 0644); -/* When guest ring is filled up, qdisc queues the packets for us, but we have - * to timeout them, otherwise other guests' packets can get stuck there +/* The time that packets can stay on the guest Rx internal queue + * before they are dropped. */ unsigned int rx_drain_timeout_msecs = 10000; module_param(rx_drain_timeout_msecs, uint, 0444); @@ -83,7 +83,6 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static inline int tx_work_todo(struct xenvif_queue *queue); -static inline int rx_work_todo(struct xenvif_queue *queue); static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, u16 id, @@ -163,6 +162,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed) return false; } +void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; + if (queue->rx_queue_len > queue->rx_queue_max) + netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +} + +static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + spin_lock_irq(&queue->rx_queue.lock); + + skb = __skb_dequeue(&queue->rx_queue); + if (skb) + queue->rx_queue_len -= skb->len; + + spin_unlock_irq(&queue->rx_queue.lock); + + return skb; +} + +static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) +{ + spin_lock_irq(&queue->rx_queue.lock); + + if (queue->rx_queue_len < queue->rx_queue_max) + netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id)); + + spin_unlock_irq(&queue->rx_queue.lock); +} + + +static void xenvif_rx_queue_purge(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + while ((skb = xenvif_rx_dequeue(queue)) != NULL) + kfree_skb(skb); +} + +static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) +{ + struct sk_buff *skb; + + for(;;) { + skb = skb_peek(&queue->rx_queue); + if (!skb) + break; + if (time_before(jiffies, XENVIF_RX_CB(skb)->expires)) + break; + xenvif_rx_dequeue(queue); + kfree_skb(skb); + } +} + /* * Returns true if we should start a new receive buffer instead of * adding 'size' bytes to a buffer which currently contains 'offset' @@ -237,13 +299,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, return meta; } -struct xenvif_rx_cb { - int meta_slots_used; - bool full_coalesce; -}; - -#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb) - /* * Set up the grant operations for this fragment. If it's a flipping * interface, we also set up the unmap request from here. @@ -587,7 +642,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_queue_head_init(&rxq); - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) { + while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX) + && (skb = xenvif_rx_dequeue(queue)) != NULL) { RING_IDX max_slots_needed; RING_IDX old_req_cons; RING_IDX ring_slots_used; @@ -634,15 +690,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue) skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; - /* If the skb may not fit then bail out now */ - if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) { - skb_queue_head(&queue->rx_queue, skb); - need_to_notify = true; - queue->rx_last_skb_slots = max_slots_needed; - break; - } else - queue->rx_last_skb_slots = 0; - old_req_cons = queue->rx.req_cons; XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue); ring_slots_used = queue->rx.req_cons - old_req_cons; @@ -1869,12 +1916,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) } } -static inline int rx_work_todo(struct xenvif_queue *queue) -{ - return (!skb_queue_empty(&queue->rx_queue) && - xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots)); -} - static inline int tx_work_todo(struct xenvif_queue *queue) { if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))) @@ -1931,92 +1972,64 @@ err: return err; } -static void xenvif_start_queue(struct xenvif_queue *queue) +static bool xenvif_have_rx_work(struct xenvif_queue *queue) { - if (xenvif_schedulable(queue->vif)) - xenvif_wake_queue(queue); + return (!skb_queue_empty(&queue->rx_queue) + && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) + || kthread_should_stop() + || queue->vif->disabled; } -/* Only called from the queue's thread, it handles the situation when the guest - * doesn't post enough requests on the receiving ring. - * First xenvif_start_xmit disables QDisc and start a timer, and then either the - * timer fires, or the guest send an interrupt after posting new request. If it - * is the timer, the carrier is turned off here. - * */ -static void xenvif_rx_purge_event(struct xenvif_queue *queue) +static long xenvif_rx_queue_timeout(struct xenvif_queue *queue) { - /* Either the last unsuccesful skb or at least 1 slot should fit */ - int needed = queue->rx_last_skb_slots ? - queue->rx_last_skb_slots : 1; + struct sk_buff *skb; + long timeout; - /* It is assumed that if the guest post new slots after this, the RX - * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up - * the thread again - */ - set_bit(QUEUE_STATUS_RX_STALLED, &queue->status); - if (!xenvif_rx_ring_slots_available(queue, needed)) { - rtnl_lock(); - if (netif_carrier_ok(queue->vif->dev)) { - /* Timer fired and there are still no slots. Turn off - * everything except the interrupts - */ - netif_carrier_off(queue->vif->dev); - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id); - } else { - /* Probably an another queue already turned the carrier - * off, make sure nothing is stucked in the internal - * queue of this queue - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } - rtnl_unlock(); - } else if (!netif_carrier_ok(queue->vif->dev)) { - unsigned int num_queues = queue->vif->num_queues; - unsigned int i; - /* The carrier was down, but an interrupt kicked - * the thread again after new requests were - * posted - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - rtnl_lock(); - netif_carrier_on(queue->vif->dev); - netif_tx_wake_all_queues(queue->vif->dev); - rtnl_unlock(); + skb = skb_peek(&queue->rx_queue); + if (!skb) + return MAX_SCHEDULE_TIMEOUT; - for (i = 0; i < num_queues; i++) { - struct xenvif_queue *temp = &queue->vif->queues[i]; + timeout = XENVIF_RX_CB(skb)->expires - jiffies; + return timeout < 0 ? 0 : timeout; +} - xenvif_napi_schedule_or_enable_events(temp); - } - if (net_ratelimit()) - netdev_err(queue->vif->dev, "Carrier on again\n"); - } else { - /* Queuing were stopped, but the guest posted - * new requests and sent an interrupt - */ - clear_bit(QUEUE_STATUS_RX_STALLED, - &queue->status); - del_timer_sync(&queue->rx_stalled); - xenvif_start_queue(queue); +/* Wait until the guest Rx thread has work. + * + * The timeout needs to be adjusted based on the current head of the + * queue (and not just the head at the beginning). In particular, if + * the queue is initially empty an infinite timeout is used and this + * needs to be reduced when a skb is queued. + * + * This cannot be done with wait_event_timeout() because it only + * calculates the timeout once. + */ +static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) +{ + DEFINE_WAIT(wait); + + if (xenvif_have_rx_work(queue)) + return; + + for (;;) { + long ret; + + prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); + if (xenvif_have_rx_work(queue)) + break; + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); + if (!ret) + break; } + finish_wait(&queue->wq, &wait); } int xenvif_kthread_guest_rx(void *data) { struct xenvif_queue *queue = data; - struct sk_buff *skb; + struct xenvif *vif = queue->vif; - while (!kthread_should_stop()) { - wait_event_interruptible(queue->wq, - rx_work_todo(queue) || - queue->vif->disabled || - test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) || - kthread_should_stop()); + for (;;) { + xenvif_wait_for_rx_work(queue); if (kthread_should_stop()) break; @@ -2028,35 +2041,29 @@ int xenvif_kthread_guest_rx(void *data) * context so we defer it here, if this thread is * associated with queue 0. */ - if (unlikely(queue->vif->disabled && queue->id == 0)) { - xenvif_carrier_off(queue->vif); - } else if (unlikely(queue->vif->disabled)) { - /* kthread_stop() would be called upon this thread soon, - * be a bit proactive - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; - } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, - &queue->status))) { - xenvif_rx_purge_event(queue); - } else if (!netif_carrier_ok(queue->vif->dev)) { - /* Another queue stalled and turned the carrier off, so - * purge the internal queue of queues which were not - * blocked - */ - skb_queue_purge(&queue->rx_queue); - queue->rx_last_skb_slots = 0; + if (unlikely(vif->disabled && queue->id == 0)) { + xenvif_carrier_off(vif); + xenvif_rx_queue_purge(queue); + continue; } if (!skb_queue_empty(&queue->rx_queue)) xenvif_rx_action(queue); + /* Queued packets may have foreign pages from other + * domains. These cannot be queued indefinitely as + * this would starve guests of grant refs and transmit + * slots. + */ + xenvif_rx_queue_drop_expired(queue); + + xenvif_rx_queue_maybe_wake(queue); + cond_resched(); } /* Bin any remaining skbs */ - while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) - dev_kfree_skb(skb); + xenvif_rx_queue_purge(queue); return 0; } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 9060857c9022..96a754d8e517 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) struct xenvif_queue *queue = m->private; struct xen_netif_tx_back_ring *tx_ring = &queue->tx; struct xen_netif_rx_back_ring *rx_ring = &queue->rx; + struct netdev_queue *dev_queue; if (tx_ring->sring) { struct xen_netif_tx_sring *sring = tx_ring->sring; @@ -112,6 +113,13 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) queue->credit_timeout.expires, jiffies); + dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id); + + seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n", + queue->rx_queue_len, queue->rx_queue_max, + skb_queue_len(&queue->rx_queue), + netif_tx_queue_stopped(dev_queue) ? "stopped" : "running"); + return 0; } -- cgit v1.2.3-58-ga151 From ecf08d2dbb96d5a4b4bcc53a39e8d29cc8fef02e Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Oct 2014 14:08:55 +0100 Subject: xen-netback: reintroduce guest Rx stall detection If a frontend not receiving packets it is useful to detect this and turn off the carrier so packets are dropped early instead of being queued and drained when they expire. A to-guest queue is stalled if it doesn't have enough free slots for a an extended period of time (default 60 s). If at least one queue is stalled, the carrier is turned off (in the expectation that the other queues will soon stall as well). The carrier is only turned on once all queues are ready. When the frontend connects, all the queues start in the stalled state and only become ready once the frontend queues enough Rx requests. Signed-off-by: David Vrabel Reviewed-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 5 +++ drivers/net/xen-netback/interface.c | 5 ++- drivers/net/xen-netback/netback.c | 76 +++++++++++++++++++++++++++++++++++++ drivers/net/xen-netback/xenbus.c | 1 + 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index c2642402b7a1..083ecc93fe5e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -179,6 +179,8 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; + unsigned long last_rx_time; + bool stalled; struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS]; @@ -232,6 +234,9 @@ struct xenvif { /* Queues */ struct xenvif_queue *queues; unsigned int num_queues; /* active queues, resource allocated */ + unsigned int stalled_queues; + + spinlock_t lock; #ifdef CONFIG_DEBUG_FS struct dentry *xenvif_dbg_root; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index a134d52f55b4..895fe84011e7 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -419,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->queues = NULL; vif->num_queues = 0; + spin_lock_init(&vif->lock); + dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -505,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif) dev_set_mtu(vif->dev, ETH_DATA_LEN); netdev_update_features(vif->dev); set_bit(VIF_STATUS_CONNECTED, &vif->status); - netif_carrier_on(vif->dev); if (netif_running(vif->dev)) xenvif_up(vif); rtnl_unlock(); @@ -565,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, disable_irq(queue->rx_irq); } + queue->stalled = true; + task = kthread_create(xenvif_kthread_guest_rx, (void *)queue, "%s-guest-rx", queue->name); if (IS_ERR(task)) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 57aa3b507d32..6563f0713fc0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -62,6 +62,13 @@ unsigned int rx_drain_timeout_msecs = 10000; module_param(rx_drain_timeout_msecs, uint, 0444); unsigned int rx_drain_timeout_jiffies; +/* The length of time before the frontend is considered unresponsive + * because it isn't providing Rx slots. + */ +static unsigned int rx_stall_timeout_msecs = 60000; +module_param(rx_stall_timeout_msecs, uint, 0444); +static unsigned int rx_stall_timeout_jiffies; + unsigned int xenvif_max_queues; module_param_named(max_queues, xenvif_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, @@ -649,6 +656,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue) RING_IDX ring_slots_used; int i; + queue->last_rx_time = jiffies; + /* We need a cheap worse case estimate for the number of * slots we'll use. */ @@ -1972,10 +1981,67 @@ err: return err; } +static void xenvif_queue_carrier_off(struct xenvif_queue *queue) +{ + struct xenvif *vif = queue->vif; + + queue->stalled = true; + + /* At least one queue has stalled? Disable the carrier. */ + spin_lock(&vif->lock); + if (vif->stalled_queues++ == 0) { + netdev_info(vif->dev, "Guest Rx stalled"); + netif_carrier_off(vif->dev); + } + spin_unlock(&vif->lock); +} + +static void xenvif_queue_carrier_on(struct xenvif_queue *queue) +{ + struct xenvif *vif = queue->vif; + + queue->last_rx_time = jiffies; /* Reset Rx stall detection. */ + queue->stalled = false; + + /* All queues are ready? Enable the carrier. */ + spin_lock(&vif->lock); + if (--vif->stalled_queues == 0) { + netdev_info(vif->dev, "Guest Rx ready"); + netif_carrier_on(vif->dev); + } + spin_unlock(&vif->lock); +} + +static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return !queue->stalled + && prod - cons < XEN_NETBK_RX_SLOTS_MAX + && time_after(jiffies, + queue->last_rx_time + rx_stall_timeout_jiffies); +} + +static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + + prod = queue->rx.sring->req_prod; + cons = queue->rx.req_cons; + + return queue->stalled + && prod - cons >= XEN_NETBK_RX_SLOTS_MAX; +} + static bool xenvif_have_rx_work(struct xenvif_queue *queue) { return (!skb_queue_empty(&queue->rx_queue) && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)) + || xenvif_rx_queue_stalled(queue) + || xenvif_rx_queue_ready(queue) || kthread_should_stop() || queue->vif->disabled; } @@ -2050,6 +2116,15 @@ int xenvif_kthread_guest_rx(void *data) if (!skb_queue_empty(&queue->rx_queue)) xenvif_rx_action(queue); + /* If the guest hasn't provided any Rx slots for a + * while it's probably not responsive, drop the + * carrier so packets are dropped earlier. + */ + if (xenvif_rx_queue_stalled(queue)) + xenvif_queue_carrier_off(queue); + else if (xenvif_rx_queue_ready(queue)) + xenvif_queue_carrier_on(queue); + /* Queued packets may have foreign pages from other * domains. These cannot be queued indefinitely as * this would starve guests of grant refs and transmit @@ -2120,6 +2195,7 @@ static int __init netback_init(void) goto failed_init; rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); + rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs); #ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 96a754d8e517..4e56a27f9689 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -711,6 +711,7 @@ static void connect(struct backend_info *be) be->vif->queues = vzalloc(requested_num_queues * sizeof(struct xenvif_queue)); be->vif->num_queues = requested_num_queues; + be->vif->stalled_queues = requested_num_queues; for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) { queue = &be->vif->queues[queue_index]; -- cgit v1.2.3-58-ga151 From 349ce993ac706869d553a1816426d3a4bfda02b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 12:58:58 -0700 Subject: tcp: md5: do not use alloc_percpu() percpu tcp_md5sig_pool contains memory blobs that ultimately go through sg_set_buf(). -> sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); This requires that whole area is in a physically contiguous portion of memory. And that @buf is not backed by vmalloc(). Given that alloc_percpu() can use vmalloc() areas, this does not fit the requirements. Replace alloc_percpu() by a static DEFINE_PER_CPU() as tcp_md5sig_pool is small anyway, there is no gain to dynamically allocate it. Signed-off-by: Eric Dumazet Fixes: 765cf9976e93 ("tcp: md5: remove one indirection level in tcp_md5sig_pool") Reported-by: Crestez Dan Leonard Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 59 ++++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1bec4e76d88c..39ec0c379545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return raw_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } -- cgit v1.2.3-58-ga151 From fe0ca7328d03d36aafecebb3af650e1bb2841c20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Oct 2014 19:43:46 -0700 Subject: macvlan: fix a race on port dismantle and possible skb leaks We need to cancel the work queue after rcu grace period, otherwise it can be rescheduled by incoming packets. We need to purge queue if some skbs are still in it. We can use __skb_queue_head_init() variant in macvlan_process_broadcast() Signed-off-by: Eric Dumazet Fixes: 412ca1550cbec ("macvlan: Move broadcasts into a work queue") Cc: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 29b3bb410781..bfb0b6ec8c56 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -272,7 +272,7 @@ static void macvlan_process_broadcast(struct work_struct *w) struct sk_buff *skb; struct sk_buff_head list; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); spin_lock_bh(&port->bc_queue.lock); skb_queue_splice_tail_init(&port->bc_queue, &list); @@ -1082,9 +1082,15 @@ static void macvlan_port_destroy(struct net_device *dev) { struct macvlan_port *port = macvlan_port_get_rtnl(dev); - cancel_work_sync(&port->bc_work); dev->priv_flags &= ~IFF_MACVLAN_PORT; netdev_rx_handler_unregister(dev); + + /* After this point, no packet can schedule bc_work anymore, + * but we need to cancel it and purge left skbs if any. + */ + cancel_work_sync(&port->bc_work); + __skb_queue_purge(&port->bc_queue); + kfree_rcu(port, rcu); } -- cgit v1.2.3-58-ga151 From 013f6579c6e4f9517127a176bfc37bbac0b766cb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Oct 2014 20:06:29 -0700 Subject: i40e: _MASK vs _SHIFT typo in i40e_handle_mdd_event() We accidentally mask by the _SHIFT variable. It means that "event" is always zero. Signed-off-by: Dan Carpenter Tested-by: Jim Young Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ed5f1c15fb0f..c3a7f4a4b775 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> I40E_GL_MDET_TX_VF_NUM_SHIFT; - u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >> + u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; @@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; - u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >> + u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; -- cgit v1.2.3-58-ga151 From b71e821de50f0ff92f10f33064ee1713e9014158 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 23 Oct 2014 10:25:53 +0200 Subject: drivers: net: xgene: Rewrite buggy loop in xgene_enet_ecc_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’: drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function Depending on the arbitrary value on the stack, the loop may terminate too early, and cause a bogus -ENODEV failure. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index e6d24c210198..c22f32622fa9 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -124,20 +124,18 @@ static int xgene_enet_ecc_init(struct xgene_enet_pdata *p) { struct net_device *ndev = p->ndev; u32 data; - int i; + int i = 0; xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0); - for (i = 0; i < 10 && data != ~0U ; i++) { + do { usleep_range(100, 110); data = xgene_enet_rd_diag_csr(p, ENET_BLOCK_MEM_RDY_ADDR); - } + if (data == ~0U) + return 0; + } while (++i < 10); - if (data != ~0U) { - netdev_err(ndev, "Failed to release memory from shutdown\n"); - return -ENODEV; - } - - return 0; + netdev_err(ndev, "Failed to release memory from shutdown\n"); + return -ENODEV; } static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p) -- cgit v1.2.3-58-ga151 From 96e4be06cbfcb8c9c2da7c77bacce0e56b581c0b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 23 Oct 2014 15:57:26 +0300 Subject: net/mlx5_core: Call synchronize_irq() before freeing EQ buffer After destroying the EQ, the object responsible for generating interrupts, call synchronize_irq() to ensure that any handler routines running on other CPU cores finish execution. Only then free the EQ buffer. This patch solves a very rare case when we get panic on driver unload. The same thing is done when we destroy a CQ which is one of the sources generating interrupts. In the case of CQ we want to avoid completion handlers on a CQ that was destroyed. In the case we do the same to avoid receiving asynchronous events after the EQ has been destroyed and its buffers freed. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ed53291468f3..a278238a2db6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -420,6 +420,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); + synchronize_irq(table->msix_arr[eq->irqn].vector); mlx5_buf_free(dev, &eq->buf); return err; -- cgit v1.2.3-58-ga151 From bf1bac5b7882daa41249f85fbc97828f0597de5c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 23 Oct 2014 15:57:27 +0300 Subject: net/mlx4_core: Call synchronize_irq() before freeing EQ buffer After moving the EQ ownership to software effectively destroying it, call synchronize_irq() to ensure that any handler routines running on other CPU cores finish execution. Only then free the EQ buffer. The same thing is done when we destroy a CQ which is one of the sources generating interrupts. In the case of CQ we want to avoid completion handlers on a CQ that was destroyed. In the case we do the same to avoid receiving asynchronous events after the EQ has been destroyed and its buffers freed. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index a49c9d11d8a5..49290a405903 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1026,6 +1026,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, pr_cont("\n"); } } + synchronize_irq(eq->irq); mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) -- cgit v1.2.3-58-ga151 From 93a35f59f1b13a02674877e3efdf07ae47e34052 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 06:30:30 -0700 Subject: net: napi_reuse_skb() should check pfmemalloc Do not reuse skb if it was pfmemalloc tainted, otherwise future frame might be dropped anyway. Signed-off-by: Eric Dumazet Signed-off-by: Roman Gushchin Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index b793e3521a36..945bbd001359 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); -- cgit v1.2.3-58-ga151 From 7965ee93719921ea5978f331da653dfa2d7b99f5 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Sun, 26 Oct 2014 12:22:40 +0100 Subject: netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops() The code looks for an already loaded target, and the correct list to search is nft_target_list, not nft_match_list. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 0480f57a4eb6..9d6d6f60a80f 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -672,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, family = ctx->afi->family; /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_match_list, head) { + list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && -- cgit v1.2.3-58-ga151 From 2376c879b80c83424a3013834be97fb9fe2d4180 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Thu, 23 Oct 2014 14:37:30 -0700 Subject: cxgb4 : Improve handling of DCB negotiation or loss thereof Clear out any DCB apps we might have added to kernel table when we lose DCB sync (or IEEE equivalent event). These were previously left behind and not cleaned up correctly. IEEE allows individual components to work independently, so improve check for IEEE completion by specifying individual components. Fixes: 10b0046685ab ("cxgb4: IEEE fixes for DCBx state machine") Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 48 ++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 8edf0f5bd679..ee819fd12bd2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -60,6 +60,42 @@ void cxgb4_dcb_version_init(struct net_device *dev) dcb->dcb_version = FW_PORT_DCB_VER_AUTO; } +static void cxgb4_dcb_cleanup_apps(struct net_device *dev) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = pi->adapter; + struct port_dcb_info *dcb = &pi->dcb; + struct dcb_app app; + int i, err; + + /* zero priority implies remove */ + app.priority = 0; + + for (i = 0; i < CXGB4_MAX_DCBX_APP_SUPPORTED; i++) { + /* Check if app list is exhausted */ + if (!dcb->app_priority[i].protocolid) + break; + + app.protocol = dcb->app_priority[i].protocolid; + + if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) { + app.selector = dcb->app_priority[i].sel_field + 1; + err = dcb_ieee_setapp(dev, &app); + } else { + app.selector = !!(dcb->app_priority[i].sel_field); + err = dcb_setapp(dev, &app); + } + + if (err) { + dev_err(adap->pdev_dev, + "Failed DCB Clear %s Application Priority: sel=%d, prot=%d, , err=%d\n", + dcb_ver_array[dcb->dcb_version], app.selector, + app.protocol, -err); + break; + } + } +} + /* Finite State machine for Data Center Bridging. */ void cxgb4_dcb_state_fsm(struct net_device *dev, @@ -145,6 +181,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, * state. We need to reset back to a ground state * of incomplete. */ + cxgb4_dcb_cleanup_apps(dev); cxgb4_dcb_state_init(dev); dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; dcb->supported = CXGB4_DCBX_FW_SUPPORT; @@ -833,11 +870,16 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, /* Return whether IEEE Data Center Bridging has been negotiated. */ -static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev) +static inline int +cxgb4_ieee_negotiation_complete(struct net_device *dev, + enum cxgb4_dcb_fw_msgs dcb_subtype) { struct port_info *pi = netdev2pinfo(dev); struct port_dcb_info *dcb = &pi->dcb; + if (dcb_subtype && !(dcb->msgs & dcb_subtype)) + return 0; + return (dcb->state == CXGB4_DCB_STATE_FW_ALLSYNCED && (dcb->supported & DCB_CAP_DCBX_VER_IEEE)); } @@ -850,7 +892,7 @@ static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app) { int prio; - if (!cxgb4_ieee_negotiation_complete(dev)) + if (!cxgb4_ieee_negotiation_complete(dev, CXGB4_DCB_FW_APP_ID)) return -EINVAL; if (!(app->selector && app->protocol)) return -EINVAL; @@ -872,7 +914,7 @@ static int cxgb4_ieee_setapp(struct net_device *dev, struct dcb_app *app) { int ret; - if (!cxgb4_ieee_negotiation_complete(dev)) + if (!cxgb4_ieee_negotiation_complete(dev, CXGB4_DCB_FW_APP_ID)) return -EINVAL; if (!(app->selector && app->protocol)) return -EINVAL; -- cgit v1.2.3-58-ga151 From 3bb062613b1ecbd0c388106f61344d699f7859ec Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Thu, 23 Oct 2014 14:37:31 -0700 Subject: cxgb4 : Handle dcb enable correctly Disabling DCBx in firmware automatically enables DCBx for control via host lldp agents. Wait for an explicit setstate call from an lldp agents to enable DCBx instead. Fixes: 76bcb31efc06 ("cxgb4 : Add DCBx support codebase and dcbnl_ops") Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 7 ++++++- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index ee819fd12bd2..6fe300e316c3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -116,7 +116,6 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, /* we're going to use Host DCB */ dcb->state = CXGB4_DCB_STATE_HOST; dcb->supported = CXGB4_DCBX_HOST_SUPPORT; - dcb->enabled = 1; break; } @@ -386,6 +385,12 @@ static u8 cxgb4_setstate(struct net_device *dev, u8 enabled) { struct port_info *pi = netdev2pinfo(dev); + /* If DCBx is host-managed, dcb is enabled by outside lldp agents */ + if (pi->dcb.state == CXGB4_DCB_STATE_HOST) { + pi->dcb.enabled = enabled; + return 0; + } + /* Firmware doesn't provide any mechanism to control the DCB state. */ if (enabled != (pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED)) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3f60070f2519..97683c1c5b69 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -694,7 +694,11 @@ int cxgb4_dcb_enabled(const struct net_device *dev) #ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); - return pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED; + if (!pi->dcb.enabled) + return 0; + + return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || + (pi->dcb.state == CXGB4_DCB_STATE_HOST)); #else return 0; #endif -- cgit v1.2.3-58-ga151 From f89b7755f517cdbb755d7543eef986ee9d54e654 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 23 Oct 2014 18:41:08 -0700 Subject: bpf: split eBPF out of NET introduce two configs: - hidden CONFIG_BPF to select eBPF interpreter that classic socket filters depend on - visible CONFIG_BPF_SYSCALL (default off) that tracing and sockets can use that solves several problems: - tracing and others that wish to use eBPF don't need to depend on NET. They can use BPF_SYSCALL to allow loading from userspace or select BPF to use it directly from kernel in NET-less configs. - in 3.18 programs cannot be attached to events yet, so don't force it on - when the rest of eBPF infra is there in 3.19+, it's still useful to switch it off to minimize kernel size bloat-o-meter on x64 shows: add/remove: 0/60 grow/shrink: 0/2 up/down: 0/-15601 (-15601) tested with many different config combinations. Hopefully didn't miss anything. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- init/Kconfig | 14 ++++++++++++++ kernel/Makefile | 2 +- kernel/bpf/Makefile | 6 +++--- kernel/bpf/core.c | 9 +++++++++ net/Kconfig | 2 +- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 3ee28ae02cc8..2081a4d3d917 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1341,6 +1341,10 @@ config SYSCTL_ARCH_UNALIGN_ALLOW config HAVE_PCSPKR_PLATFORM bool +# interpreter that classic socket filters depend on +config BPF + bool + menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible @@ -1521,6 +1525,16 @@ config EVENTFD If unsure, say Y. +# syscall, maps, verifier +config BPF_SYSCALL + bool "Enable bpf() system call" if EXPERT + select ANON_INODES + select BPF + default n + help + Enable the bpf() system call that allows to manipulate eBPF + programs and maps via file descriptors. + config SHMEM bool "Use full shmem filesystem" if EXPERT default y diff --git a/kernel/Makefile b/kernel/Makefile index dc5c77544fd6..17ea6d4a9a24 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,7 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o -obj-$(CONFIG_NET) += bpf/ +obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 45427239f375..0daf7f6ae7df 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,5 @@ -obj-y := core.o syscall.o verifier.o - +obj-y := core.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o ifdef CONFIG_TEST_BPF -obj-y += test_stub.o +obj-$(CONFIG_BPF_SYSCALL) += test_stub.o endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f0c30c59b317..d6594e457a25 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp) schedule_work(&aux->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); + +/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call + * skb_copy_bits(), so provide a weak definition of it for NET-less config. + */ +int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, + int len) +{ + return -EFAULT; +} diff --git a/net/Kconfig b/net/Kconfig index 6272420a721b..99815b5454bf 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,7 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS - select ANON_INODES + select BPF ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even -- cgit v1.2.3-58-ga151 From 3d53666b40007b55204ee8890618da79a20c9940 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Mon, 6 Oct 2014 08:46:19 -0700 Subject: ipvs: Avoid null-pointer deref in debug code Use daddr instead of reaching into dest. Reported-by: Dan Carpenter Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 91f17c1eb8a2..437a3663ad03 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -316,7 +316,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI4\n", &dest->addr.ip); + " daddr=%pI4\n", &daddr); goto err_put; } @@ -458,7 +458,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI6\n", &dest->addr.in6); + " daddr=%pI6\n", daddr); goto err_put; } -- cgit v1.2.3-58-ga151 From ae439286a0dec99cc8029868243689b5b5f3ff75 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 24 Oct 2014 23:44:04 +0200 Subject: net: dsa: Error out on tagging protocol mismatches If there is a mismatch between enabled tagging protocols and the protocol the switch supports, error out, rather than continue with a situation which is unlikely to work. Signed-off-by: Andrew Lunn cc: alexander.h.duyck@intel.com Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22f34cf4cb27..6317b41c99b0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -174,8 +174,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, dst->rcv = brcm_netdev_ops.rcv; break; #endif - default: + case DSA_TAG_PROTO_NONE: break; + default: + ret = -ENOPROTOOPT; + goto out; } dst->tag_protocol = drv->tag_protocol; -- cgit v1.2.3-58-ga151 From c146b7788e5721ec15bc0197bedf75849508e7ea Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 24 Oct 2014 23:44:05 +0200 Subject: dsa: mv88e6171: Fix tagging protocol/Kconfig The mv88e6171 can support two different tagging protocols, DSA and EDSA. The switch driver structure only allows one protocol to be enumerated, and DSA was chosen. However the Kconfig entry ensures the EDSA tagging code is built. With a minimal configuration, we then end up with a mismatch. The probe is successful, EDSA tagging is used, but the switch is configured for DSA, resulting in mangled packets. Change the switch driver structure to enumerate EDSA, fixing the mismatch. Signed-off-by: Andrew Lunn Fixes: 42f272539487 ("net: DSA: Marvell mv88e6171 switch driver") Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6171.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 1020a7af67cf..78d8e876f3aa 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -395,7 +395,7 @@ static int mv88e6171_get_sset_count(struct dsa_switch *ds) } struct dsa_switch_driver mv88e6171_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_DSA, + .tag_protocol = DSA_TAG_PROTO_EDSA, .priv_size = sizeof(struct mv88e6xxx_priv_state), .probe = mv88e6171_probe, .setup = mv88e6171_setup, -- cgit v1.2.3-58-ga151 From 47276fcc2d542e7b15e384c08b1709c1921b06c1 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Fri, 24 Oct 2014 18:51:33 +0530 Subject: drivers: net:cpsw: fix probe_dt when only slave 1 is pinned out when slave 0 has no phy and slave 1 connected to phy, driver probe will fail as there is no phy id present for slave 0 device tree, so continuing even though no phy-id found, also moving mac-id read later to ensure mac-id is read from device tree even when phy-id entry in not found. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 952e1e4764b7..d81b84b5e3df 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2006,7 +2006,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, parp = of_get_property(slave_node, "phy_id", &lenp); if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) { dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i); - return -EINVAL; + goto no_phy_slave; } mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); @@ -2019,6 +2019,14 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); + slave_data->phy_if = of_get_phy_mode(slave_node); + if (slave_data->phy_if < 0) { + dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n", + i); + return slave_data->phy_if; + } + +no_phy_slave: mac_addr = of_get_mac_address(slave_node); if (mac_addr) { memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); @@ -2030,14 +2038,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, return ret; } } - - slave_data->phy_if = of_get_phy_mode(slave_node); - if (slave_data->phy_if < 0) { - dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n", - i); - return slave_data->phy_if; - } - if (data->dual_emac) { if (of_property_read_u32(slave_node, "dual_emac_res_vlan", &prop)) { -- cgit v1.2.3-58-ga151 From 99d881f993f066c75059d24e44c74f7a3fc199bc Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Sun, 26 Oct 2014 14:22:24 -0500 Subject: net: phy: Add SGMII Configuration for Marvell 88E1145 Initialization Marvell phy 88E1145 configuration & initialization was missing a case for initializing SGMII mode. This patch adds that case. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index bd37e45c89c0..225c033b08f3 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -50,10 +50,15 @@ #define MII_M1011_PHY_SCR 0x10 #define MII_M1011_PHY_SCR_AUTO_CROSS 0x0060 +#define MII_M1145_PHY_EXT_SR 0x1b #define MII_M1145_PHY_EXT_CR 0x14 #define MII_M1145_RGMII_RX_DELAY 0x0080 #define MII_M1145_RGMII_TX_DELAY 0x0002 +#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK 0x4 +#define MII_M1145_HWCFG_MODE_MASK 0xf +#define MII_M1145_HWCFG_FIBER_COPPER_AUTO 0x8000 + #define MII_M1111_PHY_LED_CONTROL 0x18 #define MII_M1111_PHY_LED_DIRECT 0x4100 #define MII_M1111_PHY_LED_COMBINE 0x411c @@ -676,6 +681,20 @@ static int m88e1145_config_init(struct phy_device *phydev) } } + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + int temp = phy_read(phydev, MII_M1145_PHY_EXT_SR); + if (temp < 0) + return temp; + + temp &= ~MII_M1145_HWCFG_MODE_MASK; + temp |= MII_M1145_HWCFG_MODE_SGMII_NO_CLK; + temp |= MII_M1145_HWCFG_FIBER_COPPER_AUTO; + + err = phy_write(phydev, MII_M1145_PHY_EXT_SR, temp); + if (err < 0) + return err; + } + err = marvell_of_reg_init(phydev); if (err < 0) return err; -- cgit v1.2.3-58-ga151 From ebcf34f3d4be11f994340aff629f3c17171a4f65 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 26 Oct 2014 19:14:06 -0700 Subject: skbuff.h: fix kernel-doc warning for headers_end Fix kernel-doc warning in by making both headers_start and headers_end private fields. Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a59d9343c25b..5884f95ff0e9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,7 +557,9 @@ struct sk_buff { /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ + /* private: */ __u32 headers_start[0]; + /* public: */ /* if you move pkt_type around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD @@ -642,7 +644,9 @@ struct sk_buff { __u16 network_header; __u16 mac_header; + /* private: */ __u32 headers_end[0]; + /* public: */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; -- cgit v1.2.3-58-ga151 From 8edf0047f4b8e03d94ef88f5a7dec146cce03a06 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Oct 2014 11:12:00 -0700 Subject: net: systemport: enable RX interrupts after NAPI There is currently a small window during which the SYSTEMPORT adapter enables its RX interrupts without having enabled its NAPI handler, which can result in packets to be discarded during interface bringup. A similar but more serious window exists in bcm_sysport_resume() during which we can have the RDMA engine not fully prepared to receive packets and yet having RX interrupts enabled. Fix this my moving the RX interrupt enable down to bcm_sysport_netif_start() after napi_enable() for the RX path is called, which fixes both call sites: bcm_sysport_open() and bcm_sysport_resume(). Fixes: b02e6d9ba7ad ("net: systemport: add bcm_sysport_netif_{enable,stop}") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 9ae36979bdee..7dce91189e51 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1397,6 +1397,9 @@ static void bcm_sysport_netif_start(struct net_device *dev) /* Enable NAPI */ napi_enable(&priv->napi); + /* Enable RX interrupt and TX ring full interrupt */ + intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL); + phy_start(priv->phydev); /* Enable TX interrupts for the 32 TXQs */ @@ -1499,9 +1502,6 @@ static int bcm_sysport_open(struct net_device *dev) if (ret) goto out_free_rx_ring; - /* Enable RX interrupt and TX ring full interrupt */ - intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL); - /* Turn on TDMA */ ret = tdma_enable_set(priv, 1); if (ret) @@ -1885,9 +1885,6 @@ static int bcm_sysport_resume(struct device *d) netif_device_attach(dev); - /* Enable RX interrupt and TX ring full interrupt */ - intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE | INTRL2_0_TX_RING_FULL); - /* RX pipe enable */ topctrl_writel(priv, 0, RX_FLUSH_CNTL); -- cgit v1.2.3-58-ga151 From 704d33e7006f20f9b4fa7d24a0f08c4b5919b131 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Oct 2014 11:12:01 -0700 Subject: net: systemport: reset UniMAC coming out of a suspend cycle bcm_sysport_resume() was missing an UniMAC reset which can lead to various receive FIFO corruptions coming out of a suspend cycle. If the RX FIFO is stuck, it will deliver corrupted/duplicate packets towards the host CPU interface. This could be reproduced on crowded network and when Wake-on-LAN is enabled for this particular interface because the switch still forwards packets towards the host CPU interface (SYSTEMPORT), and we had to leave the UniMAC RX enable bit on to allow matching MagicPackets. Once we re-enter the resume function, there is a small window during which the UniMAC receive is still enabled, and we start queueing packets, but the RDMA and RBUF engines are not ready, which leads to having packets stuck in the UniMAC RX FIFO, ultimately delivered towards the host CPU as corrupted. Fixes: 40755a0fce17 ("net: systemport: add suspend and resume support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 7dce91189e51..3a6778a667f4 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1858,6 +1858,8 @@ static int bcm_sysport_resume(struct device *d) if (!netif_running(dev)) return 0; + umac_reset(priv); + /* We may have been suspended and never received a WOL event that * would turn off MPD detection, take care of that now */ -- cgit v1.2.3-58-ga151 From 1efed2d06c703489342ab6af2951683e07509c99 Mon Sep 17 00:00:00 2001 From: Olivier Blin Date: Fri, 24 Oct 2014 19:43:00 +0200 Subject: usbnet: add a callback for set_rx_mode To delegate promiscuous mode and multicast filtering to the subdriver. Signed-off-by: Olivier Blin Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 20 ++++++++++++++++++++ include/linux/usb/usbnet.h | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 20615bbd693b..3a6770a65d78 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1052,6 +1052,21 @@ static void __handle_link_change(struct usbnet *dev) clear_bit(EVENT_LINK_CHANGE, &dev->flags); } +static void usbnet_set_rx_mode(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + + usbnet_defer_kevent(dev, EVENT_SET_RX_MODE); +} + +static void __handle_set_rx_mode(struct usbnet *dev) +{ + if (dev->driver_info->set_rx_mode) + (dev->driver_info->set_rx_mode)(dev); + + clear_bit(EVENT_SET_RX_MODE, &dev->flags); +} + /* work that cannot be done in interrupt context uses keventd. * * NOTE: with 2.5 we could do more of this using completion callbacks, @@ -1157,6 +1172,10 @@ skip_reset: if (test_bit (EVENT_LINK_CHANGE, &dev->flags)) __handle_link_change(dev); + if (test_bit (EVENT_SET_RX_MODE, &dev->flags)) + __handle_set_rx_mode(dev); + + if (dev->flags) netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags); } @@ -1525,6 +1544,7 @@ static const struct net_device_ops usbnet_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_set_rx_mode = usbnet_set_rx_mode, .ndo_change_mtu = usbnet_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 26088feb6608..d9a4905e01d0 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -78,6 +78,7 @@ struct usbnet { # define EVENT_NO_RUNTIME_PM 9 # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 +# define EVENT_SET_RX_MODE 12 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -159,6 +160,9 @@ struct driver_info { /* called by minidriver when receiving indication */ void (*indication)(struct usbnet *dev, void *ind, int indlen); + /* rx mode change (device changes address list filtering) */ + void (*set_rx_mode)(struct usbnet *dev); + /* for new devices, use the descriptor-reading code instead */ int in; /* rx endpoint */ int out; /* tx endpoint */ -- cgit v1.2.3-58-ga151 From d80c679bc1526183f1cf4adc54b0b72e8798555e Mon Sep 17 00:00:00 2001 From: Olivier Blin Date: Fri, 24 Oct 2014 19:43:01 +0200 Subject: cdc-ether: extract usbnet_cdc_update_filter function This will be used by the set_rx_mode callback. Also move a comment about multicast filtering in this new function. Signed-off-by: Olivier Blin Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 2a32d9167d3b..bee3689beeb4 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -67,6 +67,32 @@ static const u8 mbm_guid[16] = { 0xa6, 0x07, 0xc0, 0xff, 0xcb, 0x7e, 0x39, 0x2a, }; +static void usbnet_cdc_update_filter(struct usbnet *dev) +{ + struct cdc_state *info = (void *) &dev->data; + struct usb_interface *intf = info->control; + + u16 cdc_filter = + USB_CDC_PACKET_TYPE_ALL_MULTICAST | USB_CDC_PACKET_TYPE_DIRECTED | + USB_CDC_PACKET_TYPE_BROADCAST; + + /* FIXME cdc-ether has some multicast code too, though it complains + * in routine cases. info->ether describes the multicast support. + * Implement that here, manipulating the cdc filter as needed. + */ + + usb_control_msg(dev->udev, + usb_sndctrlpipe(dev->udev, 0), + USB_CDC_SET_ETHERNET_PACKET_FILTER, + USB_TYPE_CLASS | USB_RECIP_INTERFACE, + cdc_filter, + intf->cur_altsetting->desc.bInterfaceNumber, + NULL, + 0, + USB_CTRL_SET_TIMEOUT + ); +} + /* probes control interface, claims data interface, collects the bulk * endpoints, activates data interface (if needed), maybe sets MTU. * all pure cdc, except for certain firmware workarounds, and knowing @@ -347,16 +373,8 @@ next_desc: * don't do reset all the way. So the packet filter should * be set to a sane initial value. */ - usb_control_msg(dev->udev, - usb_sndctrlpipe(dev->udev, 0), - USB_CDC_SET_ETHERNET_PACKET_FILTER, - USB_TYPE_CLASS | USB_RECIP_INTERFACE, - USB_CDC_PACKET_TYPE_ALL_MULTICAST | USB_CDC_PACKET_TYPE_DIRECTED | USB_CDC_PACKET_TYPE_BROADCAST, - intf->cur_altsetting->desc.bInterfaceNumber, - NULL, - 0, - USB_CTRL_SET_TIMEOUT - ); + usbnet_cdc_update_filter(dev); + return 0; bad_desc: @@ -468,10 +486,6 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) return status; } - /* FIXME cdc-ether has some multicast code too, though it complains - * in routine cases. info->ether describes the multicast support. - * Implement that here, manipulating the cdc filter as needed. - */ return 0; } EXPORT_SYMBOL_GPL(usbnet_cdc_bind); -- cgit v1.2.3-58-ga151 From b77e26d191590c73b4a982ea3b3b87194069a56a Mon Sep 17 00:00:00 2001 From: Olivier Blin Date: Fri, 24 Oct 2014 19:43:02 +0200 Subject: cdc-ether: handle promiscuous mode with a set_rx_mode callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promiscuous mode was not supported anymore with my Lenovo adapters (RTL8153) since commit c472ab68ad67db23c9907a27649b7dc0899b61f9 (cdc-ether: clean packet filter upon probe). It was not possible to use them in a bridge anymore. Signed-off-by: Olivier Blin Also-analyzed-by: Loïc Yhuel Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index bee3689beeb4..d3920b54a92c 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -76,6 +76,9 @@ static void usbnet_cdc_update_filter(struct usbnet *dev) USB_CDC_PACKET_TYPE_ALL_MULTICAST | USB_CDC_PACKET_TYPE_DIRECTED | USB_CDC_PACKET_TYPE_BROADCAST; + if (dev->net->flags & IFF_PROMISC) + cdc_filter |= USB_CDC_PACKET_TYPE_PROMISCUOUS; + /* FIXME cdc-ether has some multicast code too, though it complains * in routine cases. info->ether describes the multicast support. * Implement that here, manipulating the cdc filter as needed. @@ -496,6 +499,7 @@ static const struct driver_info cdc_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, + .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; @@ -505,6 +509,7 @@ static const struct driver_info wwan_info = { .bind = usbnet_cdc_bind, .unbind = usbnet_cdc_unbind, .status = usbnet_cdc_status, + .set_rx_mode = usbnet_cdc_update_filter, .manage_power = usbnet_manage_power, }; -- cgit v1.2.3-58-ga151 From d56109020d93337545dd257a790cb429a70acfad Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 24 Oct 2014 16:55:58 -0700 Subject: sch_pie: schedule the timer after all init succeed Cc: Vijay Subramanian Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Eric Dumazet --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 33d7a98a7a97..b783a446d884 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -445,7 +445,6 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = q->params.limit; setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); - mod_timer(&q->adapt_timer, jiffies + HZ / 2); if (opt) { int err = pie_change(sch, opt); @@ -454,6 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) return err; } + mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; } -- cgit v1.2.3-58-ga151 From b2ed64a97430a26a63c6ea91c9b50e639a98dfbc Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 27 Oct 2014 17:39:16 +0100 Subject: ipv6: notify userspace when we added or changed an ipv6 token NetworkManager might want to know that it changed when the router advertisement arrives. Signed-off-by: Lubomir Rintel Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 725c763270a0..0169ccf5aa4f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4531,6 +4531,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + inet6_ifinfo_notify(RTM_NEWLINK, idev); addrconf_verify_rtnl(); return 0; } -- cgit v1.2.3-58-ga151 From cd03cf0158449f9f4c19ecb54dfc97d9bd86eeeb Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 27 Oct 2014 23:22:10 +0530 Subject: cxgb4vf: Replace repetitive pci device ID's with right ones Replaced repetive Device ID's which got added in commit b961f9a48844ecf3 ("cxgb4vf: Remove superfluous "idx" parameter of CH_DEVICE() macro") Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index bfa398d91826..0b42bddaf284 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2929,14 +2929,14 @@ static const struct pci_device_id cxgb4vf_pci_tbl[] = { CH_DEVICE(0x480d), /* T480-cr */ CH_DEVICE(0x480e), /* T440-lp-cr */ CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), - CH_DEVICE(0x4880), + CH_DEVICE(0x4881), + CH_DEVICE(0x4882), + CH_DEVICE(0x4883), + CH_DEVICE(0x4884), + CH_DEVICE(0x4885), + CH_DEVICE(0x4886), + CH_DEVICE(0x4887), + CH_DEVICE(0x4888), CH_DEVICE(0x5801), /* T520-cr */ CH_DEVICE(0x5802), /* T522-cr */ CH_DEVICE(0x5803), /* T540-cr */ -- cgit v1.2.3-58-ga151 From 8f4eb70059ee834522ce90a6fce0aa3078c18620 Mon Sep 17 00:00:00 2001 From: Tej Parkash Date: Tue, 28 Oct 2014 01:18:15 -0400 Subject: cnic: Update the rcu_access_pointer() usages 1. Remove the rcu_read_lock/unlock around rcu_access_pointer 2. Replace the rcu_dereference with rcu_access_pointer Signed-off-by: Tej Parkash Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 23f23c97c2ad..f05fab65d78a 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -382,10 +382,8 @@ static int cnic_iscsi_nl_msg_recv(struct cnic_dev *dev, u32 msg_type, if (l5_cid >= MAX_CM_SK_TBL_SZ) break; - rcu_read_lock(); if (!rcu_access_pointer(cp->ulp_ops[CNIC_ULP_L4])) { rc = -ENODEV; - rcu_read_unlock(); break; } csk = &cp->csk_tbl[l5_cid]; @@ -414,7 +412,6 @@ static int cnic_iscsi_nl_msg_recv(struct cnic_dev *dev, u32 msg_type, } } csk_put(csk); - rcu_read_unlock(); rc = 0; } } @@ -615,7 +612,7 @@ static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type) cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL); mutex_lock(&cnic_lock); - if (rcu_dereference(cp->ulp_ops[ulp_type])) { + if (rcu_access_pointer(cp->ulp_ops[ulp_type])) { RCU_INIT_POINTER(cp->ulp_ops[ulp_type], NULL); cnic_put(dev); } else { -- cgit v1.2.3-58-ga151 From 65ba1f1ec0eff1c25933468e1d238201c0c2cb29 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:30:34 +0100 Subject: inet: frags: fix a race between inet_evict_bucket and inet_frag_kill When the evictor is running it adds some chosen frags to a local list to be evicted once the chain lock has been released but at the same time the *frag_queue can be running for some of the same queues and it may call inet_frag_kill which will wait on the chain lock and will then delete the queue from the wrong list since it was added in the eviction one. The fix is simple - check if the queue has the evict flag set under the chain lock before deleting it, this is safe because the evict flag is set only under that lock and having the flag set also means that the queue has been detached from the chain list, so no need to delete it again. An important note to make is that we're safe w.r.t refcnt because inet_frag_kill and inet_evict_bucket will sync on the del_timer operation where only one of the two can succeed (or if the timer is executing - none of them), the cases are: 1. inet_frag_kill succeeds in del_timer - then the timer ref is removed, but inet_evict_bucket will not add this queue to its expire list but will restart eviction in that chain 2. inet_evict_bucket succeeds in del_timer - then the timer ref is kept until the evictor "expires" the queue, but inet_frag_kill will remove the initial ref and will set INET_FRAG_COMPLETE which will make the frag_expire fn just to remove its ref. In the end all of the queue users will do an inet_frag_put and the one that reaches 0 will free it. The refcount balance should be okay. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Suggested-by: Eric Dumazet Reported-by: Patrick McLean Tested-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9eb89f3f0ee4..894ec30c5896 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,7 +285,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - hlist_del(&fq->list); + if (!(fq->flags & INET_FRAG_EVICTED)) + hlist_del(&fq->list); spin_unlock(&hb->chain_lock); } -- cgit v1.2.3-58-ga151 From d70127e8a942364de8dd140fe73893efda363293 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:44:01 +0100 Subject: inet: frags: remove the WARN_ON from inet_evict_bucket The WARN_ON in inet_evict_bucket can be triggered by a valid case: inet_frag_kill and inet_evict_bucket can be running in parallel on the same queue which means that there has been at least one more ref added by a previous inet_frag_find call, but inet_frag_kill can delete the timer before inet_evict_bucket which will cause the WARN_ON() there to trigger since we'll have refcnt!=1. Now, this case is valid because the queue is being "killed" for some reason (removed from the chain list and its timer deleted) so it will get destroyed in the end by one of the inet_frag_put() calls which reaches 0 i.e. refcnt is still valid. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Reported-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 894ec30c5896..19419b60cb37 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -146,7 +146,6 @@ evict_again: atomic_inc(&fq->refcnt); spin_unlock(&hb->chain_lock); del_timer_sync(&fq->timer); - WARN_ON(atomic_read(&fq->refcnt) != 1); inet_frag_put(fq, f); goto evict_again; } -- cgit v1.2.3-58-ga151 From a22bb0b9b9b09b4cc711f6d577679773e074dde9 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Wed, 22 Oct 2014 15:29:24 +0000 Subject: e1000: unset IFF_UNICAST_FLT on WMware 82545EM VMWare's e1000 implementation does not seem to support unicast filtering. This can be observed by configuring a macvlan interface on eth0 in a VM in VMWare Fusion 5.0.5, and trying to use that interface instead of eth0. Tested on 3.16. Signed-off-by: Francesco Ruggeri Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 5f6aded512f5..24f3986cfae2 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1075,7 +1075,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_CSUM | NETIF_F_SG); - netdev->priv_flags |= IFF_UNICAST_FLT; + /* Do not set IFF_UNICAST_FLT for VMWare's 82545EM */ + if (hw->device_id != E1000_DEV_ID_82545EM_COPPER || + hw->subsystem_vendor_id != PCI_VENDOR_ID_VMWARE) + netdev->priv_flags |= IFF_UNICAST_FLT; adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); -- cgit v1.2.3-58-ga151 From bc16e47f03a7dce9ad68029b21519265c334eb12 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 23 Oct 2014 03:32:27 +0000 Subject: igb: don't reuse pages with pfmemalloc flag Incoming packet is dropped silently by sk_filter(), if the skb was allocated from pfmemalloc reserves and the corresponding socket is not marked with the SOCK_MEMALLOC flag. Igb driver allocates pages for DMA with __skb_alloc_page(), which calls alloc_pages_node() with the __GFP_MEMALLOC flag. So, in case of OOM condition, igb can get pages with pfmemalloc flag set. If an incoming packet hits the pfmemalloc page and is large enough (small packets are copying into the memory, allocated with netdev_alloc_skb_ip_align(), so they are not affected), it will be dropped. This behavior is ok under high memory pressure, but the problem is that the igb driver reuses these mapped pages. So, packets are still dropping even if all memory issues are gone and there is a plenty of free memory. In my case, some TCP sessions hang on a small percentage (< 0.1%) of machines days after OOMs. Fix this by avoiding reuse of such pages. Signed-off-by: Roman Gushchin Tested-by: Aaron Brown "aaron.f.brown@intel.com" Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a21b14495ebd..a2d72a87cbde 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6537,6 +6537,9 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, if (unlikely(page_to_nid(page) != numa_node_id())) return false; + if (unlikely(page->pfmemalloc)) + return false; + #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(page_count(page) != 1)) @@ -6603,7 +6606,8 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); /* we can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(page) == numa_node_id())) + if (likely((page_to_nid(page) == numa_node_id()) && + !page->pfmemalloc)) return true; /* this page cannot be reused so discard it */ -- cgit v1.2.3-58-ga151 From 4d2fcfbcf8141cdf70245a0c0612b8076f4b7e32 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Wed, 22 Oct 2014 15:29:03 +0000 Subject: ixgbe: need not repeat init skb with NULL Signed-off-by: Martin Zhang Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fec5212d4337..d2df4e3d1032 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4321,8 +4321,8 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) IXGBE_CB(skb)->page_released = false; } dev_kfree_skb(skb); + rx_buffer->skb = NULL; } - rx_buffer->skb = NULL; if (rx_buffer->dma) dma_unmap_page(dev, rx_buffer->dma, ixgbe_rx_pg_size(rx_ring), -- cgit v1.2.3-58-ga151 From e3215f0ac77ec23b052cb0bf511143038ac2ad7b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 28 Oct 2014 05:50:03 +0000 Subject: ixgbe: fix race when setting advertised speed Following commands: modprobe ixgbe ifconfig ethX up ethtool -s ethX advertise 0x020 can lead to "setup link failed with code -14" error due to the setup_link call racing with the SFP detection routine in the watchdog. This patch resolves this issue by protecting the setup_link call with check for __IXGBE_IN_SFP_INIT. Reported-by: Scott Harrison Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 3ce4a258f945..0ae038b9af90 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -342,12 +342,16 @@ static int ixgbe_set_settings(struct net_device *netdev, if (old == advertised) return err; /* this sets the link speed and restarts auto-neg */ + while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) + usleep_range(1000, 2000); + hw->mac.autotry_restart = true; err = hw->mac.ops.setup_link(hw, advertised, true); if (err) { e_info(probe, "setup link failed with code %d\n", err); hw->mac.ops.setup_link(hw, old, true); } + clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); } else { /* in this case we currently only support 10Gb/FULL */ u32 speed = ethtool_cmd_speed(ecmd); -- cgit v1.2.3-58-ga151 From 664d6a792785cc677c2091038ce10322c8d04ae1 Mon Sep 17 00:00:00 2001 From: Cyril Brulebois Date: Tue, 28 Oct 2014 16:42:41 +0100 Subject: wireless: rt2x00: add new rt2800usb device 0x1b75 0xa200 AirLive WN-200USB wireless 11b/g/n dongle References: https://bugs.debian.org/766802 Reported-by: Martin Mokrejs Cc: stable@vger.kernel.org Signed-off-by: Cyril Brulebois Acked-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 573897b8e878..8444313eabe2 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -1111,6 +1111,7 @@ static struct usb_device_id rt2800usb_device_table[] = { /* Ovislink */ { USB_DEVICE(0x1b75, 0x3071) }, { USB_DEVICE(0x1b75, 0x3072) }, + { USB_DEVICE(0x1b75, 0xa200) }, /* Para */ { USB_DEVICE(0x20b8, 0x8888) }, /* Pegatron */ -- cgit v1.2.3-58-ga151 From a017ff755e43de9a3221d4ff4f03184ea7b93733 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Oct 2014 18:48:05 +0300 Subject: ath9k: fix some debugfs output The right shift operation has higher precedence than the mask so we left shift by "(i * 3)" and then immediately right shift by "(i * 3)" then we mask. It should be left shift, mask, and then right shift. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 46f20a309b5f..5c45e787814e 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -455,7 +455,7 @@ static ssize_t read_file_dma(struct file *file, char __user *user_buf, "%2d %2x %1x %2x %2x\n", i, (*qcuBase & (0x7 << qcuOffset)) >> qcuOffset, (*qcuBase & (0x8 << qcuOffset)) >> (qcuOffset + 3), - val[2] & (0x7 << (i * 3)) >> (i * 3), + (val[2] & (0x7 << (i * 3))) >> (i * 3), (*dcuBase & (0x1f << dcuOffset)) >> dcuOffset); } -- cgit v1.2.3-58-ga151 From 3a8fede115f12f7b90524d1ba4e709ce398ce8c6 Mon Sep 17 00:00:00 2001 From: Marc Yang Date: Wed, 29 Oct 2014 22:44:34 +0530 Subject: mwifiex: restart rxreorder timer correctly During 11n RX reordering, if there is a hole in RX table, driver will not send packets to kernel until the rxreorder timer expires or the table is full. However, currently driver always restarts rxreorder timer when receiving a packet, which causes the timer hardly to expire. So while connected with to 11n AP in a busy environment, ping packets may get blocked for about 30 seconds. This patch fixes this timer restarting by ensuring rxreorder timer would only be restarted either timer is not set or start_win has changed. Signed-off-by: Chin-Ran Lo Signed-off-by: Plus Chen Signed-off-by: Marc Yang Signed-off-by: Cathy Luo Signed-off-by: Avinash Patil Signed-off-by: John W. Linville --- drivers/net/wireless/mwifiex/11n_rxreorder.c | 52 +++++++++++++++++++++------- drivers/net/wireless/mwifiex/11n_rxreorder.h | 2 ++ drivers/net/wireless/mwifiex/main.h | 1 + 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.c b/drivers/net/wireless/mwifiex/11n_rxreorder.c index 40057079ffb9..5ef5a0eeba50 100644 --- a/drivers/net/wireless/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/mwifiex/11n_rxreorder.c @@ -196,6 +196,7 @@ mwifiex_del_rx_reorder_entry(struct mwifiex_private *priv, mwifiex_11n_dispatch_pkt_until_start_win(priv, tbl, start_win); del_timer_sync(&tbl->timer_context.timer); + tbl->timer_context.timer_is_set = false; spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags); list_del(&tbl->list); @@ -297,6 +298,7 @@ mwifiex_flush_data(unsigned long context) (struct reorder_tmr_cnxt *) context; int start_win, seq_num; + ctx->timer_is_set = false; seq_num = mwifiex_11n_find_last_seq_num(ctx); if (seq_num < 0) @@ -385,6 +387,7 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta, new_node->timer_context.ptr = new_node; new_node->timer_context.priv = priv; + new_node->timer_context.timer_is_set = false; init_timer(&new_node->timer_context.timer); new_node->timer_context.timer.function = mwifiex_flush_data; @@ -399,6 +402,22 @@ mwifiex_11n_create_rx_reorder_tbl(struct mwifiex_private *priv, u8 *ta, spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, flags); } +static void +mwifiex_11n_rxreorder_timer_restart(struct mwifiex_rx_reorder_tbl *tbl) +{ + u32 min_flush_time; + + if (tbl->win_size >= MWIFIEX_BA_WIN_SIZE_32) + min_flush_time = MIN_FLUSH_TIMER_15_MS; + else + min_flush_time = MIN_FLUSH_TIMER_MS; + + mod_timer(&tbl->timer_context.timer, + jiffies + msecs_to_jiffies(min_flush_time * tbl->win_size)); + + tbl->timer_context.timer_is_set = true; +} + /* * This function prepares command for adding a BA request. * @@ -523,31 +542,31 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv, u8 *ta, u8 pkt_type, void *payload) { struct mwifiex_rx_reorder_tbl *tbl; - int start_win, end_win, win_size; + int prev_start_win, start_win, end_win, win_size; u16 pkt_index; bool init_window_shift = false; + int ret = 0; tbl = mwifiex_11n_get_rx_reorder_tbl(priv, tid, ta); if (!tbl) { if (pkt_type != PKT_TYPE_BAR) mwifiex_11n_dispatch_pkt(priv, payload); - return 0; + return ret; } if ((pkt_type == PKT_TYPE_AMSDU) && !tbl->amsdu) { mwifiex_11n_dispatch_pkt(priv, payload); - return 0; + return ret; } start_win = tbl->start_win; + prev_start_win = start_win; win_size = tbl->win_size; end_win = ((start_win + win_size) - 1) & (MAX_TID_VALUE - 1); if (tbl->flags & RXREOR_INIT_WINDOW_SHIFT) { init_window_shift = true; tbl->flags &= ~RXREOR_INIT_WINDOW_SHIFT; } - mod_timer(&tbl->timer_context.timer, - jiffies + msecs_to_jiffies(MIN_FLUSH_TIMER_MS * win_size)); if (tbl->flags & RXREOR_FORCE_NO_DROP) { dev_dbg(priv->adapter->dev, @@ -568,11 +587,14 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv, if ((start_win + TWOPOW11) > (MAX_TID_VALUE - 1)) { if (seq_num >= ((start_win + TWOPOW11) & (MAX_TID_VALUE - 1)) && - seq_num < start_win) - return -1; + seq_num < start_win) { + ret = -1; + goto done; + } } else if ((seq_num < start_win) || - (seq_num > (start_win + TWOPOW11))) { - return -1; + (seq_num >= (start_win + TWOPOW11))) { + ret = -1; + goto done; } } @@ -601,8 +623,10 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv, else pkt_index = (seq_num+MAX_TID_VALUE) - start_win; - if (tbl->rx_reorder_ptr[pkt_index]) - return -1; + if (tbl->rx_reorder_ptr[pkt_index]) { + ret = -1; + goto done; + } tbl->rx_reorder_ptr[pkt_index] = payload; } @@ -613,7 +637,11 @@ int mwifiex_11n_rx_reorder_pkt(struct mwifiex_private *priv, */ mwifiex_11n_scan_and_dispatch(priv, tbl); - return 0; +done: + if (!tbl->timer_context.timer_is_set || + prev_start_win != tbl->start_win) + mwifiex_11n_rxreorder_timer_restart(tbl); + return ret; } /* diff --git a/drivers/net/wireless/mwifiex/11n_rxreorder.h b/drivers/net/wireless/mwifiex/11n_rxreorder.h index 3a87bb0e3a62..63ecea89b4ab 100644 --- a/drivers/net/wireless/mwifiex/11n_rxreorder.h +++ b/drivers/net/wireless/mwifiex/11n_rxreorder.h @@ -21,6 +21,8 @@ #define _MWIFIEX_11N_RXREORDER_H_ #define MIN_FLUSH_TIMER_MS 50 +#define MIN_FLUSH_TIMER_15_MS 15 +#define MWIFIEX_BA_WIN_SIZE_32 32 #define PKT_TYPE_BAR 0xE7 #define MAX_TID_VALUE (2 << 11) diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index e2635747d966..f55658d15c60 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -592,6 +592,7 @@ struct reorder_tmr_cnxt { struct timer_list timer; struct mwifiex_rx_reorder_tbl *ptr; struct mwifiex_private *priv; + u8 timer_is_set; }; struct mwifiex_rx_reorder_tbl { -- cgit v1.2.3-58-ga151 From c0386f1584127442d0f2aea41bc948056d6b1337 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 29 Oct 2014 23:17:08 -0500 Subject: rtlwifi: rtl8192ce: rtl8192de: rtl8192se: Fix handling for missing get_btc_status The recent changes in checking for Bluetooth status added some callbacks to code in rtlwifi. To make certain that all callbacks are defined, a dummy routine has been added to rtlwifi, and the drivers that need to use it are modified. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/core.c | 6 ++++++ drivers/net/wireless/rtlwifi/core.h | 1 + drivers/net/wireless/rtlwifi/rtl8192ce/sw.c | 1 + drivers/net/wireless/rtlwifi/rtl8192de/sw.c | 1 + drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 1 + 5 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index f6179bc06086..07dae0d44abc 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1828,3 +1828,9 @@ const struct ieee80211_ops rtl_ops = { .flush = rtl_op_flush, }; EXPORT_SYMBOL_GPL(rtl_ops); + +bool rtl_btc_status_false(void) +{ + return false; +} +EXPORT_SYMBOL_GPL(rtl_btc_status_false); diff --git a/drivers/net/wireless/rtlwifi/core.h b/drivers/net/wireless/rtlwifi/core.h index 59cd3b9dca25..624e1dc16d31 100644 --- a/drivers/net/wireless/rtlwifi/core.h +++ b/drivers/net/wireless/rtlwifi/core.h @@ -42,5 +42,6 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr, u32 mask, u32 data); void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data); bool rtl_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb); +bool rtl_btc_status_false(void); #endif diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c index d86b5b566444..46ea07605eb4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c @@ -244,6 +244,7 @@ static struct rtl_hal_ops rtl8192ce_hal_ops = { .phy_lc_calibrate = _rtl92ce_phy_lc_calibrate, .phy_set_bw_mode_callback = rtl92ce_phy_set_bw_mode_callback, .dm_dynamic_txpower = rtl92ce_dm_dynamic_txpower, + .get_btc_status = rtl_btc_status_false, }; static struct rtl_mod_params rtl92ce_mod_params = { diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c index edab5a5351b5..a0aba088259a 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/sw.c @@ -251,6 +251,7 @@ static struct rtl_hal_ops rtl8192de_hal_ops = { .get_rfreg = rtl92d_phy_query_rf_reg, .set_rfreg = rtl92d_phy_set_rf_reg, .linked_set_reg = rtl92d_linked_set_reg, + .get_btc_status = rtl_btc_status_false, }; static struct rtl_mod_params rtl92de_mod_params = { diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index 1bff2a0f7600..5e16984b92ce 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -294,6 +294,7 @@ static struct rtl_hal_ops rtl8192se_hal_ops = { .set_bbreg = rtl92s_phy_set_bb_reg, .get_rfreg = rtl92s_phy_query_rf_reg, .set_rfreg = rtl92s_phy_set_rf_reg, + .get_btc_status = rtl_btc_status_false, }; static struct rtl_mod_params rtl92se_mod_params = { -- cgit v1.2.3-58-ga151 From 501479699ff484ba8acc1d07022271f00cfc55a3 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 29 Oct 2014 23:17:09 -0500 Subject: rtlwifi: rtl8192se: Fix duplicate calls to ieee80211_register_hw() Driver rtlwifi has been modified to call ieee80211_register_hw() from the probe routine; however, the existing call in the callback routine for deferred firmware loading was not removed. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index 5e16984b92ce..1fd22081149e 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -87,11 +87,8 @@ static void rtl92s_init_aspm_vars(struct ieee80211_hw *hw) static void rtl92se_fw_cb(const struct firmware *firmware, void *context) { struct ieee80211_hw *hw = context; - struct rtl_pci_priv *pcipriv = rtl_pcipriv(hw); struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_pci *rtlpci = rtl_pcidev(pcipriv); struct rt_firmware *pfirmware = NULL; - int err; RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); @@ -112,20 +109,6 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) memcpy(pfirmware->sz_fw_tmpbuffer, firmware->data, firmware->size); pfirmware->sz_fw_tmpbufferlen = firmware->size; release_firmware(firmware); - - err = ieee80211_register_hw(hw); - if (err) { - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "Can't register mac80211 hw\n"); - return; - } else { - rtlpriv->mac80211.mac80211_registered = 1; - } - rtlpci->irq_alloc = 1; - set_bit(RTL_STATUS_INTERFACE_START, &rtlpriv->status); - - /*init rfkill */ - rtl_init_rfkill(hw); } static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) -- cgit v1.2.3-58-ga151 From 30c5ccc6afee39754cff75ad8d775ad39a2ce989 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 29 Oct 2014 23:17:10 -0500 Subject: rtlwifi: rtl8192se: Add missing section to read descriptor setting The new version of rtlwifi needs code in rtl92se_get_desc() that returns the buffer address for read operations. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192se/def.h | 2 ++ drivers/net/wireless/rtlwifi/rtl8192se/trx.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/def.h b/drivers/net/wireless/rtlwifi/rtl8192se/def.h index 83c98674bfd3..6e7a70b43949 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/def.h +++ b/drivers/net/wireless/rtlwifi/rtl8192se/def.h @@ -446,6 +446,8 @@ /* DWORD 6 */ #define SET_RX_STATUS__DESC_BUFF_ADDR(__pdesc, __val) \ SET_BITS_OFFSET_LE(__pdesc + 24, 0, 32, __val) +#define GET_RX_STATUS_DESC_BUFF_ADDR(__pdesc) \ + SHIFT_AND_MASK_LE(__pdesc + 24, 0, 32) #define SE_RX_HAL_IS_CCK_RATE(_pdesc)\ (GET_RX_STATUS_DESC_RX_MCS(_pdesc) == DESC92_RATE1M || \ diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c index b358ebce8942..672fd3b02835 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c @@ -640,6 +640,9 @@ u32 rtl92se_get_desc(u8 *desc, bool istx, u8 desc_name) case HW_DESC_RXPKT_LEN: ret = GET_RX_STATUS_DESC_PKT_LEN(desc); break; + case HW_DESC_RXBUFF_ADDR: + ret = GET_RX_STATUS_DESC_BUFF_ADDR(desc); + break; default: RT_ASSERT(false, "ERR rxdesc :%d not process\n", desc_name); -- cgit v1.2.3-58-ga151 From 8ae3c16e41b02db8ffe4121468519d6352baedc1 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 29 Oct 2014 23:17:11 -0500 Subject: rtlwifi: rtl8192ce: Add missing section to read descriptor setting The new version of rtlwifi needs code in rtl92ce_get_desc() that returns the buffer address for read operations. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192ce/def.h | 2 ++ drivers/net/wireless/rtlwifi/rtl8192ce/trx.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h index 831df101d7b7..9b660df6fd71 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h @@ -114,6 +114,8 @@ LE_BITS_TO_4BYTE(((__pcmdfbhdr) + 4), 16, 4) #define GET_C2H_CMD_FEEDBACK_CCX_SEQ(__pcmdfbhdr) \ LE_BITS_TO_4BYTE(((__pcmdfbhdr) + 4), 20, 12) +#define GET_RX_STATUS_DESC_BUFF_ADDR(__pdesc) \ + SHIFT_AND_MASK_LE(__pdesc + 24, 0, 32) #define CHIP_VER_B BIT(4) #define CHIP_BONDING_IDENTIFIER(_value) (((_value) >> 22) & 0x3) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c index 2fb9c7acb76a..dc3d20b17a26 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c @@ -728,6 +728,9 @@ u32 rtl92ce_get_desc(u8 *p_desc, bool istx, u8 desc_name) case HW_DESC_RXPKT_LEN: ret = GET_RX_DESC_PKT_LEN(pdesc); break; + case HW_DESC_RXBUFF_ADDR: + ret = GET_RX_STATUS_DESC_BUFF_ADDR(pdesc); + break; default: RT_ASSERT(false, "ERR rxdesc :%d not process\n", desc_name); -- cgit v1.2.3-58-ga151 From 75a916e1944fea8347d2245c62567187e4eff9dd Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 29 Oct 2014 23:17:13 -0500 Subject: rtlwifi: rtl8192se: Fix firmware loading An error in the code makes the allocated space for firmware to be too small. Signed-off-by: Larry Finger Cc: Murilo Opsfelder Araujo Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index 1fd22081149e..aadba29c167a 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -209,8 +209,8 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) if (!rtlpriv->rtlhal.pfirmware) return 1; - rtlpriv->max_fw_size = RTL8190_MAX_RAW_FIRMWARE_CODE_SIZE; - + rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 + + sizeof(struct fw_hdr); pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n" "Loading firmware %s\n", rtlpriv->cfg->fw_name); /* request fw */ -- cgit v1.2.3-58-ga151 From 923e1ee3ff0b585cc4f56cf696c8455708537ffb Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 29 Oct 2014 11:12:15 +0800 Subject: r8152: clear SELECTIVE_SUSPEND when autoresuming The flag of SELECTIVE_SUSPEND should be cleared when autoresuming. Otherwise, when the system suspend and resume occur, it may have the wrong flow. Besides, because the flag of SELECTIVE_SUSPEND couldn't be used to check if the hw enables the relative feature, it should alwayes be disabled in close(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e3d84c322e4e..a17ca584182e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2955,10 +2955,7 @@ static int rtl8152_close(struct net_device *netdev) * be disable when autoresume occurs, because the * netif_running() would be false. */ - if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_runtime_suspend_enable(tp, false); - clear_bit(SELECTIVE_SUSPEND, &tp->flags); - } + rtl_runtime_suspend_enable(tp, false); tasklet_disable(&tp->tl); tp->rtl_ops.down(tp); @@ -3253,6 +3250,8 @@ static int rtl8152_resume(struct usb_interface *intf) set_bit(WORK_ENABLE, &tp->flags); } usb_submit_urb(tp->intr_urb, GFP_KERNEL); + } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { + clear_bit(SELECTIVE_SUSPEND, &tp->flags); } mutex_unlock(&tp->control); -- cgit v1.2.3-58-ga151 From f4c7476b041d200c3b347f019eebf05e6d0b47f9 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 29 Oct 2014 11:12:16 +0800 Subject: r8152: reset tp->speed before autoresuming in open function If (tp->speed & LINK_STATUS) is not zero, the rtl8152_resume() would call rtl_start_rx() before enabling the tx/rx. Avoid this by resetting it to zero. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a17ca584182e..f2242316525e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2891,6 +2891,9 @@ static int rtl8152_open(struct net_device *netdev) if (res) goto out; + /* set speed to 0 to avoid autoresume try to submit rx */ + tp->speed = 0; + res = usb_autopm_get_interface(tp->intf); if (res < 0) { free_all_mem(tp); @@ -2904,6 +2907,8 @@ static int rtl8152_open(struct net_device *netdev) clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); + + /* disable the tx/rx, if the workqueue has enabled them. */ if (tp->speed & LINK_STATUS) tp->rtl_ops.disable(tp); } -- cgit v1.2.3-58-ga151 From e3bd1a81cd1e3f8ed961e642e97206d715db06c4 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 29 Oct 2014 11:12:17 +0800 Subject: r8152: check WORK_ENABLE in suspend function Avoid unnecessary behavior when autosuspend occurs during open(). The relative processes should only be run after finishing open(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f2242316525e..ca3c5d5f93eb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3207,7 +3207,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) netif_device_detach(netdev); } - if (netif_running(netdev)) { + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); tasklet_disable(&tp->tl); -- cgit v1.2.3-58-ga151 From e327c225c911529898ec300cb96d2088893de3df Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Wed, 29 Oct 2014 17:54:03 -0700 Subject: cxgb4 : Fix missing initialization of win0_lock win0_lock was being used un-initialized, resulting in warning traces being seen when lock debugging is enabled (and just wrong) Fixes : fc5ab0209650 ('cxgb4: Replaced the backdoor mechanism to access the HW memory with PCIe Window method') Signed-off-by: Anish Bhatt Signed-off-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 97683c1c5b69..8520d5529df8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6614,6 +6614,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->stats_lock); spin_lock_init(&adapter->tid_release_lock); + spin_lock_init(&adapter->win0_lock); INIT_WORK(&adapter->tid_release_task, process_tid_release_list); INIT_WORK(&adapter->db_full_task, process_db_full); -- cgit v1.2.3-58-ga151 From fa19c2b050ab5254326f5fc07096dd3c6a8d5d58 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 30 Oct 2014 10:09:53 +0100 Subject: ipv4: Do not cache routing failures due to disabled forwarding. If we cache them, the kernel will reuse them, independently of whether forwarding is enabled or not. Which means that if forwarding is disabled on the input interface where the first routing request comes from, then that unreachable result will be cached and reused for other interfaces, even if forwarding is enabled on them. The opposite is also true. This can be verified with two interfaces A and B and an output interface C, where B has forwarding enabled, but not A and trying ip route get $dst iif A from $src && ip route get $dst iif B from $src Signed-off-by: Nicolas Cavallari Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2d4ae469b471..6a2155b02602 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1798,6 +1798,7 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); res.type = RTN_UNREACHABLE; + res.fi = NULL; goto local_input; /* -- cgit v1.2.3-58-ga151 From a4f2dacbf2a5045e34b98a35d9a3857800f25a7b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 30 Oct 2014 15:59:27 +0200 Subject: net/mlx4_en: Don't attempt to TX offload the outer UDP checksum for VXLAN For VXLAN/NVGRE encapsulation, the current HW doesn't support offloading both the outer UDP TX checksum and the inner TCP/UDP TX checksum. The driver doesn't advertize SKB_GSO_UDP_TUNNEL_CSUM, however we are wrongly telling the HW to offload the outer UDP checksum for encapsulated packets, fix that. Fixes: 837052d0ccc5 ('net/mlx4_en: Add netdev support for TCP/IP offloads of vxlan tunneling') Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 34c137878545..454d9fea640e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -836,8 +836,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) * whether LSO is used */ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { - tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | - MLX4_WQE_CTRL_TCP_UDP_CSUM); + if (!skb->encapsulation) + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM); + else + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); ring->tx_csum++; } -- cgit v1.2.3-58-ga151 From 571e1b2c7a4c2fd5faa1648462a6b65fa26530d7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 30 Oct 2014 15:59:28 +0200 Subject: mlx4: Avoid leaking steering rules on flow creation error flow If mlx4_ib_create_flow() attempts to create > 1 rules with the firmware, and one of these registrations fail, we leaked the already created flow rules. One example of the leak is when the registration of the VXLAN ghost steering rule fails, we didn't unregister the original rule requested by the user, introduced in commit d2fce8a9060d "mlx4: Set user-space raw Ethernet QPs to properly handle VXLAN traffic". While here, add dump of the VXLAN portion of steering rules so it can actually be seen when flow creation fails. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 10 ++++++++-- drivers/net/ethernet/mellanox/mlx4/mcg.c | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bda5994ceb68..8b72cf392b34 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1173,18 +1173,24 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], &mflow->reg_id[i]); if (err) - goto err_free; + goto err_create_flow; i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { err = mlx4_ib_tunnel_steer_add(qp, flow_attr, &mflow->reg_id[i]); if (err) - goto err_free; + goto err_create_flow; + i++; } return &mflow->ibflow; +err_create_flow: + while (i) { + (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, mflow->reg_id[i]); + i--; + } err_free: kfree(mflow); return ERR_PTR(err); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index ca0f98c95105..872843179f44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -955,6 +955,10 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str, cur->ib.dst_gid_msk); break; + case MLX4_NET_TRANS_RULE_ID_VXLAN: + len += snprintf(buf + len, BUF_SIZE - len, + "VNID = %d ", be32_to_cpu(cur->vxlan.vni)); + break; case MLX4_NET_TRANS_RULE_ID_IPV6: break; -- cgit v1.2.3-58-ga151 From 14051f0452a2c26a3f4791e6ad6a435e8f1945ff Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 30 Oct 2014 08:40:56 -0700 Subject: gre: Use inner mac length when computing tunnel length Currently, skb_inner_network_header is used but this does not account for Ethernet header for ETH_P_TEB. Use skb_inner_mac_header which handles TEB and also should work with IP encapsulation in which case inner mac and inner network headers are the same. Tested: Ran TCP_STREAM over GRE, worked as expected. Signed-off-by: Tom Herbert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f6e345c0bc23..bb5947b0ce2d 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -47,7 +47,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); if (unlikely(ghl < sizeof(*greh))) goto out; -- cgit v1.2.3-58-ga151 From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2014 10:32:34 -0700 Subject: net: skb_fclone_busy() needs to detect orphaned skb Some drivers are unable to perform TX completions in a bound time. They instead call skb_orphan() Problem is skb_fclone_busy() has to detect this case, otherwise we block TCP retransmits and can freeze unlucky tcp sessions on mostly idle hosts. Signed-off-by: Eric Dumazet Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++-- net/ipv4/tcp_output.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5884f95ff0e9..6c8b6f604e76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -799,15 +799,19 @@ struct sk_buff_fclones { * @skb: buffer * * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. */ -static inline bool skb_fclone_busy(const struct sk_buff *skb) +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE; + fclones->skb2.fclone == SKB_FCLONE_CLONE && + fclones->skb2.sk == sk; } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3af21296d967..a3d453b94747 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4c4e457e7888..88bf289abdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } -- cgit v1.2.3-58-ga151 From 3d0ad09412ffe00c9afa201d01effdb6023d09b4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:12 +0000 Subject: drivers/net: Disable UFO through virtio IPv6 does not allow fragmentation by routers, so there is no fragmentation ID in the fixed header. UFO for IPv6 requires the ID to be passed separately, but there is no provision for this in the virtio net protocol. Until recently our software implementation of UFO/IPv6 generated a new ID, but this was a bug. Now we will use ID=0 for any UFO/IPv6 packet passed through a tap, which is even worse. Unfortunately there is no distinction between UFO/IPv4 and v6 features, so disable UFO on taps and virtio_net completely until we have a proper solution. We cannot depend on VM managers respecting the tap feature flags, so keep accepting UFO packets but log a warning the first time we do this. Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 13 +++++-------- drivers/net/tun.c | 19 +++++++++++-------- drivers/net/virtio_net.c | 24 ++++++++++++++---------- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 65e2892342bd..2aeaa61ece09 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -65,7 +65,7 @@ static struct cdev macvtap_cdev; static const struct proto_ops macvtap_socket_ops; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) + NETIF_F_TSO6) #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG) @@ -569,6 +569,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: + pr_warn_once("macvtap: %s: using disabled UFO feature; please fix this program\n", + current->comm); gso_type = SKB_GSO_UDP; break; default: @@ -614,8 +616,6 @@ static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (sinfo->gso_type & SKB_GSO_TCP_ECN) @@ -950,9 +950,6 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } - - if (arg & TUN_F_UFO) - feature_mask |= NETIF_F_UFO; } /* tun/tap driver inverts the usage for TSO offloads, where @@ -963,7 +960,7 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ - if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; @@ -1064,7 +1061,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO)) + TUN_F_TSO_ECN)) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 186ce541c657..280d3d2a9792 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -174,7 +174,7 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6|NETIF_F_UFO) + NETIF_F_TSO6) int vnet_hdr_sz; int sndbuf; @@ -1149,8 +1149,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: + { + static bool warned; + + if (!warned) { + warned = true; + netdev_warn(tun->dev, + "%s: using disabled UFO feature; please fix this program\n", + current->comm); + } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; + } default: tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -1251,8 +1261,6 @@ static ssize_t tun_put_user(struct tun_struct *tun, gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - gso.gso_type = VIRTIO_NET_HDR_GSO_UDP; else { pr_err("unexpected GSO type: " "0x%x, gso_size %d, hdr_len %d\n", @@ -1762,11 +1770,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } - - if (arg & TUN_F_UFO) { - features |= NETIF_F_UFO; - arg &= ~TUN_F_UFO; - } } /* This gives the user a way to test for new features in future by diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d75256bd1a6a..ec2a8b41ed41 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -491,8 +491,17 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: + { + static bool warned; + + if (!warned) { + warned = true; + netdev_warn(dev, + "host using disabled UFO feature; please fix it\n"); + } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; + } case VIRTIO_NET_HDR_GSO_TCPV6: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; @@ -881,8 +890,6 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) @@ -1705,7 +1712,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { - dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO + dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6; } /* Individual feature bits: what can host handle? */ @@ -1715,11 +1722,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; - if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO)) - dev->hw_features |= NETIF_F_UFO; if (gso) - dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO); + dev->features |= dev->hw_features & NETIF_F_ALL_TSO; /* (!csum && gso) case will be fixed by register_netdev() */ } if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) @@ -1757,8 +1762,7 @@ static int virtnet_probe(struct virtio_device *vdev) /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) @@ -1952,9 +1956,9 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, -- cgit v1.2.3-58-ga151 From 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 +++ drivers/net/tun.c | 6 +++++- include/net/ipv6.h | 2 ++ net/ipv6/output_core.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 2aeaa61ece09..6f226de655a4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -572,6 +573,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, pr_warn_once("macvtap: %s: using disabled UFO feature; please fix this program\n", current->comm); gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: return -EINVAL; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 280d3d2a9792..7302398f0b1f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -1139,6 +1140,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } + skb_reset_network_header(skb); + if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1159,6 +1162,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, current->comm); } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; } default: @@ -1189,7 +1194,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } - skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_hash(skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 97f472012438..4292929392b0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,6 +671,8 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } +void ipv6_proxy_select_ident(struct sk_buff *skb); + int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index fc24c390af05..97f41a3e68d9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,11 +3,45 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include +#include #include #include #include #include +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); -- cgit v1.2.3-58-ga151 From de11b0e8c569b96c2cf6a811e3805b7aeef498a3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 31 Oct 2014 03:10:31 +0000 Subject: drivers/net: macvtap and tun depend on INET These drivers now call ipv6_proxy_select_ident(), which is defined only if CONFIG_INET is enabled. However, they have really depended on CONFIG_INET for as long as they have allowed sending GSO packets from userland. Reported-by: kbuild test robot Signed-off-by: Ben Hutchings Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Fixes: b9fb9ee07e67 ("macvtap: add GSO/csum offload support") Fixes: 5188cd44c55d ("drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets") Signed-off-by: David S. Miller --- drivers/net/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 4706386b7d34..f9009be3f307 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVLAN config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN + depends on INET help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -200,6 +201,7 @@ config RIONET_RX_SIZE config TUN tristate "Universal TUN/TAP device driver support" + depends on INET select CRC32 ---help--- TUN/TAP provides packet reception and transmission for user space -- cgit v1.2.3-58-ga151 From 4d87716cd057bde3f90e304289c1fec88d45a1cc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 12:25:06 +0200 Subject: netfilter: nf_tables_bridge: update hook_mask to allow {pre,post}routing Fixes: 36d2af5 ("netfilter: nf_tables: allow to filter from prerouting and postrouting") Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index da17a5eab8b4..074c557ab505 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -75,9 +75,11 @@ static const struct nf_chain_type filter_bridge = { .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_LOCAL_IN) | + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT), + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), }; static int __init nf_tables_bridge_init(void) -- cgit v1.2.3-58-ga151 From 052b9498eea532deb5de75277a53f6e0623215dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:24:57 +0200 Subject: netfilter: nf_reject_ipv4: split nf_send_reset() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_iphdr_put(): to build the IPv4 header. * nf_reject_ip_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 10 ++++ net/ipv4/netfilter/nf_reject_ipv4.c | 88 ++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index e8427193c777..03e928a55229 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,6 +1,8 @@ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H +#include +#include #include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) @@ -10,4 +12,12 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code) void nf_send_reset(struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl); +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); + #endif /* _IPV4_NF_REJECT_H */ diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 92b303dbd5fc..1baaa83dfe5c 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,43 +12,39 @@ #include #include #include +#include -/* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; + return NULL; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); + sizeof(struct tcphdr), _oth); if (oth == NULL) - return; + return NULL; /* No RST for RST. */ if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; + return NULL; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); + return NULL; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); - skb_reserve(nskb, LL_MAX_HEADER); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); @@ -57,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; + niph->protocol = protocol; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); @@ -69,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; - if (oth->ack) + if (oth->ack) { tcph->seq = oth->ack_seq; - else { + } else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); @@ -84,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); - nskb->protocol = htons(ETH_P_IP); + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; -- cgit v1.2.3-58-ga151 From 8bfcdf6671b1c8006c52c3eaf9fd1b5dfcf41c3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Oct 2014 12:35:54 +0100 Subject: netfilter: nf_reject_ipv6: split nf_send_reset6() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip6_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_ip6hdr_put(): to build the IPv6 header. * nf_reject_ip6_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_reject.h | 10 ++ net/ipv6/netfilter/nf_reject_ipv6.c | 175 ++++++++++++++++++++------------- 2 files changed, 119 insertions(+), 66 deletions(-) diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 48e18810a9be..23216d48abf9 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -15,4 +15,14 @@ nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit); +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); + #endif /* _IPV6_NF_REJECT_H */ diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9defc6c59..015eb8a80766 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,116 +12,102 @@ #include #include #include +#include -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; u8 proto; __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } + int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); - return; + return NULL; } - otcplen = oldskb->len - tcphoff; + *otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; + if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n", + proto, *otcplen); + return NULL; } - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); + otcph = skb_header_pointer(oldskb, tcphoff, sizeof(struct tcphdr), + otcph); + if (otcph == NULL) + return NULL; /* No RST for RST. */ - if (otcph.rst) { + if (otcph->rst) { pr_debug("RST is set\n"); - return; + return NULL; } /* Check checksum. */ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; + return NULL; } - skb_dst_set(nskb, dst); + return otcph; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); - skb_reserve(nskb, hh_len + dst->header_len); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit) +{ + struct ipv6hdr *ip6h; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = hoplimit; + ip6h->nexthdr = protocol; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + nskb->protocol = htons(ETH_P_IPV6); + + return ip6h; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); + +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) +{ + struct tcphdr *tcph; + int needs_ack; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; + tcph->source = oth->dest; + tcph->dest = oth->source; - if (otcph.ack) { + if (oth->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + otcplen - (oth->doff<<2)); tcph->seq = 0; } @@ -139,6 +125,63 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) sizeof(struct tcphdr), IPPROTO_TCP, csum_partial(tcph, sizeof(struct tcphdr), 0)); +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr _otcph; + const struct tcphdr *otcph; + unsigned int otcplen, hh_len; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; + struct dst_entry *dst = NULL; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + otcph = nf_reject_ip6_tcphdr_get(oldskb, &_otcph, &otcplen, hook); + if (!otcph) + return; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph->dest; + fl6.fl6_dport = otcph->source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + ip6_dst_hoplimit(dst)); + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); -- cgit v1.2.3-58-ga151 From 523b929d5446c023e1219aa81455a8c766cac883 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:40:26 +0200 Subject: netfilter: nft_reject_bridge: don't use IP stack to reject traffic If the packet is received via the bridge stack, this cannot reject packets from the IP stack. This adds functions to build the reject packet and send it from the bridge stack. Comments and assumptions on this patch: 1) Validate the IPv4 and IPv6 headers before further processing, given that the packet comes from the bridge stack, we cannot assume they are clean. Truncated packets are dropped, we follow similar approach in the existing iptables match/target extensions that need to inspect layer 4 headers that is not available. This also includes packets that are directed to multicast and broadcast ethernet addresses. 2) br_deliver() is exported to inject the reject packet via bridge localout -> postrouting. So the approach is similar to what we already do in the iptables reject target. The reject packet is sent to the bridge port from which we have received the original packet. 3) The reject packet is forged based on the original packet. The TTL is set based on sysctl_ip_default_ttl for IPv4 and per-net ipv6.devconf_all hoplimit for IPv6. Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_forward.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 263 +++++++++++++++++++++++++++++-- 2 files changed, 254 insertions(+), 10 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 992ec49a96aa..44cb786b925a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -112,6 +112,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index a76479535df2..31b27e1bab9f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -16,6 +16,237 @@ #include #include #include +#include +#include +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); +} + +static int nft_reject_iphdr_validate(struct sk_buff *oldskb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(oldskb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(oldskb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (oldskb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(oldskb, iph->ihl*4)) + return 0; + + return 1; +} + +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->ttl = sysctl_ip_default_ttl; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + void *payload; + __wsum csum; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); + memset(icmph, 0, sizeof(*icmph)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static int nft_reject_ip6hdr_validate(struct sk_buff *oldskb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(oldskb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(oldskb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > oldskb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + void *payload; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); + memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], @@ -23,35 +254,46 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v4_tcp_reset(pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + nft_reject_icmp_code(priv->icmp_code)); break; } break; case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, - nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + nft_reject_icmpv6_code(priv->icmp_code)); break; } break; @@ -59,6 +301,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, /* No explicit way to reject this protocol, drop it. */ break; } +out: data[NFT_REG_VERDICT].verdict = NF_DROP; } -- cgit v1.2.3-58-ga151 From 127917c29a432c3b798e014a1714e9c1af0f87fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Oct 2014 14:08:17 +0100 Subject: netfilter: nft_reject_bridge: restrict reject to prerouting and input Restrict the reject expression to the prerouting and input bridge hooks. If we allow this to be used from forward or any other later bridge hook, if the frame is flooded to several ports, we'll end up sending several reject packets, one per cloned packet. Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 31b27e1bab9f..654c9018e3e7 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -305,12 +306,34 @@ out: data[NFT_REG_VERDICT].verdict = NF_DROP; } +static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + switch (basechain->ops[0].hooknum) { + case NF_BR_PRE_ROUTING: + case NF_BR_LOCAL_IN: + break; + default: + return -EOPNOTSUPP; + } + } + return 0; +} + static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; + int icmp_code, err; + + err = nft_reject_bridge_validate_hooks(ctx->chain); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -359,6 +382,13 @@ nla_put_failure: return -1; } +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_reject_bridge_validate_hooks(ctx->chain); +} + static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, @@ -366,6 +396,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .eval = nft_reject_bridge_eval, .init = nft_reject_bridge_init, .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { -- cgit v1.2.3-58-ga151 From d59c876dd61f3c151db077f9d73774e605f2b35e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Fri, 31 Oct 2014 13:35:57 +0800 Subject: r8152: stop submitting intr for -EPROTO For Renesas USB 3.0 host controller, when unplugging the usb hub which has the RTL8153 plugged, the driver would get -EPROTO for interrupt transfer. There is high probability to get the information of "HC died; cleaning up", if the driver continues to submit the interrupt transfer before the disconnect() is called. [ 1024.197678] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.213673] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.229668] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.245661] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.261653] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.277648] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.293642] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.309638] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.325633] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.341627] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.357621] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.373615] r8152 9-1.4:1.0 eth0: intr status -71 [ 1024.383097] usb 9-1: USB disconnect, device number 2 [ 1024.383103] usb 9-1.4: USB disconnect, device number 6 [ 1029.391010] xhci_hcd 0000:04:00.0: xHCI host not responding to stop endpoint command. [ 1029.391016] xhci_hcd 0000:04:00.0: Assuming host is dying, halting host. [ 1029.392551] xhci_hcd 0000:04:00.0: HC died; cleaning up [ 1029.421480] usb 8-1: USB disconnect, device number 2 Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ca3c5d5f93eb..c6554c7a8147 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1162,6 +1162,9 @@ static void intr_callback(struct urb *urb) case -ESHUTDOWN: netif_device_detach(tp->netdev); case -ENOENT: + case -EPROTO: + netif_info(tp, intr, tp->netdev, + "Stop submitting intr, status %d\n", status); return; case -EOVERFLOW: netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n"); -- cgit v1.2.3-58-ga151 From f7065f4bd3fe4ad6bf7e49ba7c68baa2c7046146 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:49:57 -0700 Subject: mpls: Fix mpls_gso handler. mpls gso handler needs to pull skb after segmenting skb. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/mpls_gso.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index f0f5309a2d72..e3545f21a099 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -59,8 +59,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * above pulled. It will be re-pushed after returning * skb_mac_gso_segment(), an indirect caller of this function. */ - __skb_push(skb, skb->data - skb_mac_header(skb)); - + __skb_pull(skb, skb->data - skb_mac_header(skb)); out: return segs; } -- cgit v1.2.3-58-ga151 From de05c400f7dfa566f598140f8604a5de8067cd5f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:50:04 -0700 Subject: mpls: Allow mpls_gso to be built as module Kconfig already allows mpls to be built as module. Following patch fixes Makefile to do same. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 0a3c171be537..6dec088c2d0f 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -1,4 +1,4 @@ # # Makefile for MPLS. # -obj-y += mpls_gso.o +obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o -- cgit v1.2.3-58-ga151 From 7d2911c4381555b31ef0bcae42a0dbf9ade7426e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 30 Oct 2014 09:59:27 -0700 Subject: net: smc91x: Fix gpios for device tree based booting With legacy booting, the platform init code was taking care of the configuring of GPIOs. With device tree based booting, things may or may not work depending what bootloader has configured or if the legacy platform code gets called. Let's add support for the pwrdn and reset GPIOs to the smc91x driver to fix the issues of smc91x not working properly when booted in device tree mode. And let's change n900 to use these settings as some versions of the bootloader do not configure things properly causing errors. Reported-by: Kevin Hilman Signed-off-by: Tony Lindgren Signed-off-by: David S. Miller --- .../devicetree/bindings/net/smsc-lan91c111.txt | 2 + arch/arm/boot/dts/omap3-n900.dts | 2 + arch/arm/mach-omap2/pdata-quirks.c | 3 -- drivers/net/ethernet/smsc/smc91x.c | 58 ++++++++++++++++++++++ drivers/net/ethernet/smsc/smc91x.h | 3 ++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt index 0f8487b88822..e77e167593db 100644 --- a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt +++ b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt @@ -11,3 +11,5 @@ Optional properties: are supported on the device. Valid value for SMSC LAN91c111 are 1, 2 or 4. If it's omitted or invalid, the size would be 2 meaning 16-bit access only. +- power-gpios: GPIO to control the PWRDWN pin +- reset-gpios: GPIO to control the RESET pin diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 9b0494a8ab45..286bbf620c77 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -665,6 +665,8 @@ bank-width = <2>; pinctrl-names = "default"; pinctrl-0 = <ðernet_pins>; + power-gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>; /* gpio86 */ + reset-gpios = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* gpio164 */ gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index c95346c94829..cec9d6c6442c 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -252,9 +252,6 @@ static void __init nokia_n900_legacy_init(void) platform_device_register(&omap3_rom_rng_device); } - - /* Only on some development boards */ - gpio_request_one(164, GPIOF_OUT_INIT_LOW, "smc91x reset"); } static void __init omap3_tao3530_legacy_init(void) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 5e94d00b96b3..2c62208077fe 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -81,6 +81,7 @@ static const char version[] = #include #include #include +#include #include #include @@ -2188,6 +2189,41 @@ static const struct of_device_id smc91x_match[] = { {}, }; MODULE_DEVICE_TABLE(of, smc91x_match); + +/** + * of_try_set_control_gpio - configure a gpio if it exists + */ +static int try_toggle_control_gpio(struct device *dev, + struct gpio_desc **desc, + const char *name, int index, + int value, unsigned int nsdelay) +{ + struct gpio_desc *gpio = *desc; + int res; + + gpio = devm_gpiod_get_index(dev, name, index); + if (IS_ERR(gpio)) { + if (PTR_ERR(gpio) == -ENOENT) { + *desc = NULL; + return 0; + } + + return PTR_ERR(gpio); + } + res = gpiod_direction_output(gpio, !value); + if (res) { + dev_err(dev, "unable to toggle gpio %s: %i\n", name, res); + devm_gpiod_put(dev, gpio); + gpio = NULL; + return res; + } + if (nsdelay) + usleep_range(nsdelay, 2 * nsdelay); + gpiod_set_value_cansleep(gpio, value); + *desc = gpio; + + return 0; +} #endif /* @@ -2237,6 +2273,28 @@ static int smc_drv_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; u32 val; + /* Optional pwrdwn GPIO configured? */ + ret = try_toggle_control_gpio(&pdev->dev, &lp->power_gpio, + "power", 0, 0, 100); + if (ret) + return ret; + + /* + * Optional reset GPIO configured? Minimum 100 ns reset needed + * according to LAN91C96 datasheet page 14. + */ + ret = try_toggle_control_gpio(&pdev->dev, &lp->reset_gpio, + "reset", 0, 0, 100); + if (ret) + return ret; + + /* + * Need to wait for optional EEPROM to load, max 750 us according + * to LAN91C96 datasheet page 55. + */ + if (lp->reset_gpio) + usleep_range(750, 1000); + /* Combination of IO widths supported, default to 16-bit */ if (!of_property_read_u32(np, "reg-io-width", &val)) { if (val & 1) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 47dce918eb0f..2a38dacbbd27 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -298,6 +298,9 @@ struct smc_local { struct sk_buff *pending_tx_skb; struct tasklet_struct tx_task; + struct gpio_desc *power_gpio; + struct gpio_desc *reset_gpio; + /* version/revision of the SMC91x chip */ int version; -- cgit v1.2.3-58-ga151 From 1e19e084eae727654052339757ab7f1eaff58bad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 Oct 2014 18:28:03 +0200 Subject: stmmac: pci: set default of the filter bins The commit 3b57de958e2a brought the support for a different amount of the filter bins, but didn't update the PCI driver accordingly. This patch appends the default values when the device is enumerated via PCI bus. Fixes: 3b57de958e2a (net: stmmac: Support devicetree configs for mcast and ucast filter entries) Signed-off-by: Andy Shevchenko Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 655a23bbc451..e17a970eaf2b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -33,6 +33,7 @@ static struct stmmac_dma_cfg dma_cfg; static void stmmac_default_data(void) { memset(&plat_dat, 0, sizeof(struct plat_stmmacenet_data)); + plat_dat.bus_id = 1; plat_dat.phy_addr = 0; plat_dat.interface = PHY_INTERFACE_MODE_GMII; @@ -47,6 +48,12 @@ static void stmmac_default_data(void) dma_cfg.pbl = 32; dma_cfg.burst_len = DMA_AXI_BLEN_256; plat_dat.dma_cfg = &dma_cfg; + + /* Set default value for multicast hash bins */ + plat_dat.multicast_filter_bins = HASH_TABLE_SIZE; + + /* Set default value for unicast filter entries */ + plat_dat.unicast_filter_entries = 1; } /** -- cgit v1.2.3-58-ga151 From e0fb6fb6d52686134b2ece144060219591d4f8d3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 30 Oct 2014 20:50:15 -0700 Subject: net: ethtool: Return -EOPNOTSUPP if user space tries to read EEPROM with lengh 0 If a driver supports reading EEPROM but no EEPROM is installed in the system, the driver's get_eeprom_len function returns 0. ethtool will subsequently try to read that zero-length EEPROM anyway. If the driver does not support EEPROM access at all, this operation will return -EOPNOTSUPP. If the driver does support EEPROM access but no EEPROM is installed, the operation will return -EINVAL. Return -EOPNOTSUPP in both cases for consistency. Signed-off-by: Guenter Roeck Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- net/core/ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1600aa24d36b..06dfb293e5aa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1036,7 +1036,8 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - if (!ops->get_eeprom || !ops->get_eeprom_len) + if (!ops->get_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, @@ -1052,7 +1053,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) u8 *data; int ret = 0; - if (!ops->set_eeprom || !ops->get_eeprom_len) + if (!ops->set_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) -- cgit v1.2.3-58-ga151 From 6f979eb3fcfb4c3f42f230d174db4bbad0080710 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Fri, 31 Oct 2014 13:28:54 -0400 Subject: drivers: net: cpsw: Fix broken loop condition in switch mode 0d961b3b52f566f823070ce2366511a7f64b928c (drivers: net: cpsw: fix buggy loop condition) accidentally fixed a loop comparison in too many places while fixing a real bug. It was correct to fix the dual_emac mode section since there 'i' is used as an index into priv->slaves which is a 0 based array. However the other two changes (which are only used in switch mode) are wrong since there 'i' is actually the ALE port number, and port 0 is the host port, while port 1 and up are the slave ports. Putting the loop condition back in the switch mode section fixes it. A comment has been added to point out the intent clearly to avoid future confusion. Also a comment is fixed that said the opposite of what was actually happening. Signed-off-by: Len Sorensen Acked-by: Heiko Schocher Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index d81b84b5e3df..fd4577d30c5d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -591,8 +591,8 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) if (enable) { unsigned long timeout = jiffies + HZ; - /* Disable Learn for all ports */ - for (i = 0; i < priv->data.slaves; i++) { + /* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */ + for (i = 0; i <= priv->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 1); cpsw_ale_control_set(ale, i, @@ -616,11 +616,11 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); dev_dbg(&ndev->dev, "promiscuity enabled\n"); } else { - /* Flood All Unicast Packets to Host port */ + /* Don't Flood All Unicast Packets to Host port */ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); - /* Enable Learn for all ports */ - for (i = 0; i < priv->data.slaves; i++) { + /* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */ + for (i = 0; i <= priv->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 0); cpsw_ale_control_set(ale, i, -- cgit v1.2.3-58-ga151 From 1e5c4bc497c0a96e1ad2974539d353870f2cb0b6 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Fri, 31 Oct 2014 13:38:52 -0400 Subject: drivers: net: cpsw: Support ALLMULTI and fix IFF_PROMISC in switch mode The cpsw driver did not support the IFF_ALLMULTI flag which makes dynamic multicast routing not work. Related to this, when enabling IFF_PROMISC in switch mode, all registered multicast addresses are flushed, resulting in only broadcast and unicast traffic being received. A new cpsw_ale_set_allmulti function now scans through the ALE entry table and adds/removes the host port from the unregistered multicast port mask of each vlan entry depending on the state of IFF_ALLMULTI. In promiscious mode, cpsw_ale_set_allmulti is used to force reception of all multicast traffic in addition to the unicast and broadcast traffic. With this change dynamic multicast and promiscious mode both work in switch mode. Signed-off-by: Len Sorensen Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 20 ++++++++++++++++++-- drivers/net/ethernet/ti/cpsw_ale.c | 29 +++++++++++++++++++++++++++++ drivers/net/ethernet/ti/cpsw_ale.h | 2 ++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index fd4577d30c5d..d8794488f80a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -638,12 +638,16 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); + cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI); return; } else { /* Disable promiscuous mode */ cpsw_set_promiscious(ndev, false); } + /* Restore allmulti on vlans if necessary */ + cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI); + /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port); @@ -1149,6 +1153,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) const int port = priv->host_port; u32 reg; int i; + int unreg_mcast_mask; reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : CPSW2_PORT_VLAN; @@ -1158,9 +1163,14 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) for (i = 0; i < priv->data.slaves; i++) slave_write(priv->slaves + i, vlan, reg); + if (priv->ndev->flags & IFF_ALLMULTI) + unreg_mcast_mask = ALE_ALL_PORTS; + else + unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; + cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port, ALE_ALL_PORTS << port, ALE_ALL_PORTS << port, - (ALE_PORT_1 | ALE_PORT_2) << port); + unreg_mcast_mask << port); } static void cpsw_init_host_port(struct cpsw_priv *priv) @@ -1620,11 +1630,17 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, unsigned short vid) { int ret; + int unreg_mcast_mask; + + if (priv->ndev->flags & IFF_ALLMULTI) + unreg_mcast_mask = ALE_ALL_PORTS; + else + unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; ret = cpsw_ale_add_vlan(priv->ale, vid, ALE_ALL_PORTS << priv->host_port, 0, ALE_ALL_PORTS << priv->host_port, - (ALE_PORT_1 | ALE_PORT_2) << priv->host_port); + unreg_mcast_mask << priv->host_port); if (ret != 0) return ret; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0579b2243bb6..3ae83879a75f 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -443,6 +443,35 @@ int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) return 0; } +void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti) +{ + u32 ale_entry[ALE_ENTRY_WORDS]; + int type, idx; + int unreg_mcast = 0; + + /* Only bother doing the work if the setting is actually changing */ + if (ale->allmulti == allmulti) + return; + + /* Remember the new setting to check against next time */ + ale->allmulti = allmulti; + + for (idx = 0; idx < ale->params.ale_entries; idx++) { + cpsw_ale_read(ale, idx, ale_entry); + type = cpsw_ale_get_entry_type(ale_entry); + if (type != ALE_TYPE_VLAN) + continue; + + unreg_mcast = cpsw_ale_get_vlan_unreg_mcast(ale_entry); + if (allmulti) + unreg_mcast |= 1; + else + unreg_mcast &= ~1; + cpsw_ale_set_vlan_unreg_mcast(ale_entry, unreg_mcast); + cpsw_ale_write(ale, idx, ale_entry); + } +} + struct ale_control_info { const char *name; int offset, port_offset; diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 31cf43cab42e..c0d4127aa549 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -27,6 +27,7 @@ struct cpsw_ale { struct cpsw_ale_params params; struct timer_list timer; unsigned long ageout; + int allmulti; }; enum cpsw_ale_control { @@ -103,6 +104,7 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask, int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag, int reg_mcast, int unreg_mcast); int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port); +void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti); int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control); int cpsw_ale_control_set(struct cpsw_ale *ale, int port, -- cgit v1.2.3-58-ga151