summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-03-30 14:43:03 -0700
committerJakub Kicinski <kuba@kernel.org>2023-03-30 14:43:03 -0700
commit79548b7984e4c606c6caaad72a0864a83855ebc9 (patch)
tree05e1be823acbae8bbcd155a706eb9e4f84eee047 /kernel
parentda617cd8d90608582eb8d0b58026f31f1a9bfb1d (diff)
parentb2bc47e9b2011a183f9d3d3454a294a938082fb9 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Conflicts: drivers/net/ethernet/mediatek/mtk_ppe.c 3fbe4d8c0e53 ("net: ethernet: mtk_eth_soc: ppe: add support for flow accounting") 924531326e2d ("net: ethernet: mtk_eth_soc: add missing ppe cache flush when deleting a flow") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/entry/common.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/kcsan/Makefile2
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/fair.c55
5 files changed, 51 insertions, 19 deletions
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 846add8394c4..be61332c66b5 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
arch_enter_from_user_mode(regs);
lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
+ CT_WARN_ON(__ct_state() != CONTEXT_USER);
user_exit_irqoff();
instrumentation_begin();
@@ -192,13 +192,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
static void exit_to_user_mode_prepare(struct pt_regs *regs)
{
- unsigned long ti_work = read_thread_flags();
+ unsigned long ti_work;
lockdep_assert_irqs_disabled();
/* Flush pending rcuog wakeup before the last need_resched() check */
tick_nohz_user_enter_prepare();
+ ti_work = read_thread_flags();
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
diff --git a/kernel/fork.c b/kernel/fork.c
index d8cda4c6de6c..c0257cbee093 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -755,11 +755,6 @@ static void check_mm(struct mm_struct *mm)
for (i = 0; i < NR_MM_COUNTERS; i++) {
long x = percpu_counter_sum(&mm->rss_stat[i]);
- if (likely(!x))
- continue;
-
- /* Making sure this is not due to race with CPU offlining. */
- x = percpu_counter_sum_all(&mm->rss_stat[i]);
if (unlikely(x))
pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
mm, resident_page_types[i], x);
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
index 8cf70f068d92..a45f3dfc8d14 100644
--- a/kernel/kcsan/Makefile
+++ b/kernel/kcsan/Makefile
@@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o
KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
-CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer
+CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer
CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN)
obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 488655f2319f..0d18c3969f90 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (task_on_rq_migrating(p))
+ flags |= ENQUEUE_MIGRATED;
+
enqueue_task(rq, p, flags);
p->on_rq = TASK_ON_RQ_QUEUED;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a1b1f855b96..6986ea31c984 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
#endif
}
+static inline bool entity_is_long_sleeper(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq;
+ u64 sleep_time;
+
+ if (se->exec_start == 0)
+ return false;
+
+ cfs_rq = cfs_rq_of(se);
+
+ sleep_time = rq_clock_task(rq_of(cfs_rq));
+
+ /* Happen while migrating because of clock task divergence */
+ if (sleep_time <= se->exec_start)
+ return false;
+
+ sleep_time -= se->exec_start;
+ if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
+ return true;
+
+ return false;
+}
+
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
u64 vruntime = cfs_rq->min_vruntime;
- u64 sleep_time;
/*
* The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
/*
* Pull vruntime of the entity being placed to the base level of
- * cfs_rq, to prevent boosting it if placed backwards. If the entity
- * slept for a long time, don't even try to compare its vruntime with
- * the base as it may be too far off and the comparison may get
- * inversed due to s64 overflow.
- */
- sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
- if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
+ * cfs_rq, to prevent boosting it if placed backwards.
+ * However, min_vruntime can advance much faster than real time, with
+ * the extreme being when an entity with the minimal weight always runs
+ * on the cfs_rq. If the waking entity slept for a long time, its
+ * vruntime difference from min_vruntime may overflow s64 and their
+ * comparison may get inversed, so ignore the entity's original
+ * vruntime in that case.
+ * The maximal vruntime speedup is given by the ratio of normal to
+ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
+ * When placing a migrated waking entity, its exec_start has been set
+ * from a different rq. In order to take into account a possible
+ * divergence between new and prev rq's clocks task because of irq and
+ * stolen time, we take an additional margin.
+ * So, cutting off on the sleep time of
+ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
+ * should be safe.
+ */
+ if (entity_is_long_sleeper(se))
se->vruntime = vruntime;
else
se->vruntime = max_vruntime(se->vruntime, vruntime);
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
+ /* Entity has migrated, no longer consider this task hot */
+ if (flags & ENQUEUE_MIGRATED)
+ se->exec_start = 0;
check_schedstat_required();
update_stats_enqueue_fair(cfs_rq, se, flags);
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
/* Tell new CPU we are migrated */
se->avg.last_update_time = 0;
- /* We have migrated, no longer consider this task hot */
- se->exec_start = 0;
-
update_scan_period(p, new_cpu);
}