diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 63 |
1 files changed, 59 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3abc651bc38a..6ffddca687fe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -877,6 +877,15 @@ static unsigned int task_scan_max(struct task_struct *p) return max(smin, smax); } +/* + * Once a preferred node is selected the scheduler balancer will prefer moving + * a task to that node for sysctl_numa_balancing_settle_count number of PTE + * scans. This will give the process the chance to accumulate more faults on + * the preferred node but still allow the scheduler to move the task again if + * the nodes CPUs are overloaded. + */ +unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3; + static void task_numa_placement(struct task_struct *p) { int seq, nid, max_nid = -1; @@ -888,6 +897,7 @@ static void task_numa_placement(struct task_struct *p) if (p->numa_scan_seq == seq) return; p->numa_scan_seq = seq; + p->numa_migrate_seq++; p->numa_scan_period_max = task_scan_max(p); /* Find the node with the highest number of faults */ @@ -907,8 +917,10 @@ static void task_numa_placement(struct task_struct *p) } /* Update the tasks preferred node if necessary */ - if (max_faults && max_nid != p->numa_preferred_nid) + if (max_faults && max_nid != p->numa_preferred_nid) { p->numa_preferred_nid = max_nid; + p->numa_migrate_seq = 0; + } } /* @@ -4071,6 +4083,38 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) return delta < (s64)sysctl_sched_migration_cost; } +#ifdef CONFIG_NUMA_BALANCING +/* Returns true if the destination node has incurred more faults */ +static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) +{ + int src_nid, dst_nid; + + if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || + !(env->sd->flags & SD_NUMA)) { + return false; + } + + src_nid = cpu_to_node(env->src_cpu); + dst_nid = cpu_to_node(env->dst_cpu); + + if (src_nid == dst_nid || + p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) + return false; + + if (dst_nid == p->numa_preferred_nid || + p->numa_faults[dst_nid] > p->numa_faults[src_nid]) + return true; + + return false; +} +#else +static inline bool migrate_improves_locality(struct task_struct *p, + struct lb_env *env) +{ + return false; +} +#endif + /* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ @@ -4128,11 +4172,22 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* * Aggressive migration if: - * 1) task is cache cold, or - * 2) too many balance attempts have failed. + * 1) destination numa is preferred + * 2) task is cache cold, or + * 3) too many balance attempts have failed. */ - tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd); + + if (migrate_improves_locality(p, env)) { +#ifdef CONFIG_SCHEDSTATS + if (tsk_cache_hot) { + schedstat_inc(env->sd, lb_hot_gained[env->idle]); + schedstat_inc(p, se.statistics.nr_forced_migrations); + } +#endif + return 1; + } + if (!tsk_cache_hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) { |