summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c6
-rw-r--r--kernel/auditfilter.c3
-rw-r--r--kernel/futex.c93
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/kgdb.c3
-rw-r--r--kernel/rcuclassic.c16
-rw-r--r--kernel/sched.c18
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/timer.c10
10 files changed, 109 insertions, 57 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index e8692a5748c2..e092f1c0ce30 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -738,7 +738,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!audit_enabled && msg_type != AUDIT_USER_AVC)
return 0;
- err = audit_filter_user(&NETLINK_CB(skb), msg_type);
+ err = audit_filter_user(&NETLINK_CB(skb));
if (err == 1) {
err = 0;
if (msg_type == AUDIT_USER_TTY) {
@@ -779,7 +779,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
/* fallthrough */
case AUDIT_LIST:
- err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
+ err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid,
uid, seq, data, nlmsg_len(nlh),
loginuid, sessionid, sid);
break;
@@ -798,7 +798,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
/* fallthrough */
case AUDIT_LIST_RULES:
- err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
+ err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid,
uid, seq, data, nlmsg_len(nlh),
loginuid, sessionid, sid);
break;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 0e0bd27e6512..98c50cc671bb 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1544,6 +1544,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid,
* @data: payload data
* @datasz: size of payload data
* @loginuid: loginuid of sender
+ * @sessionid: sessionid for netlink audit message
* @sid: SE Linux Security ID of sender
*/
int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
@@ -1720,7 +1721,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
return 1;
}
-int audit_filter_user(struct netlink_skb_parms *cb, int type)
+int audit_filter_user(struct netlink_skb_parms *cb)
{
enum audit_state state = AUDIT_DISABLED;
struct audit_entry *e;
diff --git a/kernel/futex.c b/kernel/futex.c
index 449def8074fe..7d1136e97c14 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q)
* private futexes.
*/
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *newowner)
+ struct task_struct *newowner,
+ struct rw_semaphore *fshared)
{
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
+ struct task_struct *oldowner = pi_state->owner;
u32 uval, curval, newval;
- int ret;
+ int ret, attempt = 0;
/* Owner died? */
+ if (!pi_state->owner)
+ newtid |= FUTEX_OWNER_DIED;
+
+ /*
+ * We are here either because we stole the rtmutex from the
+ * pending owner or we are the pending owner which failed to
+ * get the rtmutex. We have to replace the pending owner TID
+ * in the user space variable. This must be atomic as we have
+ * to preserve the owner died bit here.
+ *
+ * Note: We write the user space value _before_ changing the
+ * pi_state because we can fault here. Imagine swapped out
+ * pages or a fork, which was running right before we acquired
+ * mmap_sem, that marked all the anonymous memory readonly for
+ * cow.
+ *
+ * Modifying pi_state _before_ the user space value would
+ * leave the pi_state in an inconsistent state when we fault
+ * here, because we need to drop the hash bucket lock to
+ * handle the fault. This might be observed in the PID check
+ * in lookup_pi_state.
+ */
+retry:
+ if (get_futex_value_locked(&uval, uaddr))
+ goto handle_fault;
+
+ while (1) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+ if (curval == -EFAULT)
+ goto handle_fault;
+ if (curval == uval)
+ break;
+ uval = curval;
+ }
+
+ /*
+ * We fixed up user space. Now we need to fix the pi_state
+ * itself.
+ */
if (pi_state->owner != NULL) {
spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
spin_unlock_irq(&pi_state->owner->pi_lock);
- } else
- newtid |= FUTEX_OWNER_DIED;
+ }
pi_state->owner = newowner;
@@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
spin_unlock_irq(&newowner->pi_lock);
+ return 0;
/*
- * We own it, so we have to replace the pending owner
- * TID. This must be atomic as we have preserve the
- * owner died bit here.
+ * To handle the page fault we need to drop the hash bucket
+ * lock here. That gives the other task (either the pending
+ * owner itself or the task which stole the rtmutex) the
+ * chance to try the fixup of the pi_state. So once we are
+ * back from handling the fault we need to check the pi_state
+ * after reacquiring the hash bucket lock and before trying to
+ * do another fixup. When the fixup has been done already we
+ * simply return.
*/
- ret = get_futex_value_locked(&uval, uaddr);
+handle_fault:
+ spin_unlock(q->lock_ptr);
- while (!ret) {
- newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+ spin_lock(q->lock_ptr);
- if (curval == -EFAULT)
- ret = -EFAULT;
- if (curval == uval)
- break;
- uval = curval;
- }
- return ret;
+ /*
+ * Check if someone else fixed it for us:
+ */
+ if (pi_state->owner != oldowner)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ goto retry;
}
/*
@@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* that case:
*/
if (q.pi_state->owner != curr)
- ret = fixup_pi_state_owner(uaddr, &q, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
} else {
/*
* Catch the rare case, where the lock was released
@@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
int res;
owner = rt_mutex_owner(&q.pi_state->pi_mutex);
- res = fixup_pi_state_owner(uaddr, &q, owner);
+ res = fixup_pi_state_owner(uaddr, &q, owner,
+ fshared);
/* propagate -EFAULT, if the fixup failed */
if (res)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 421be5fe5cc7..543d9ca9b4f4 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1078,7 +1078,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
}
EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
-#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
+#ifdef CONFIG_NO_HZ
/**
* hrtimer_get_next_event - get the time until next expiry event
*
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 79e3c90113c2..3ec23c3ec97f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs)
return 1;
}
-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+static void kgdb_console_write(struct console *co, const char *s,
+ unsigned count)
{
unsigned long flags;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index f4ffbd0f306f..a38895a5b8e2 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -89,8 +89,22 @@ static void force_quiescent_state(struct rcu_data *rdp,
/*
* Don't send IPI to itself. With irqs disabled,
* rdp->cpu is the current cpu.
+ *
+ * cpu_online_map is updated by the _cpu_down()
+ * using stop_machine_run(). Since we're in irqs disabled
+ * section, stop_machine_run() is not exectuting, hence
+ * the cpu_online_map is stable.
+ *
+ * However, a cpu might have been offlined _just_ before
+ * we disabled irqs while entering here.
+ * And rcu subsystem might not yet have handled the CPU_DEAD
+ * notification, leading to the offlined cpu's bit
+ * being set in the rcp->cpumask.
+ *
+ * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+ * sending smp_reschedule() to an offlined CPU.
*/
- cpumask = rcp->cpumask;
+ cpus_and(cpumask, rcp->cpumask, cpu_online_map);
cpu_clear(rdp->cpu, cpumask);
for_each_cpu_mask(cpu, cpumask)
smp_send_reschedule(cpu);
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11af..94ead43eda62 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
signal_pending(current)) ||
(state == TASK_KILLABLE &&
fatal_signal_pending(current))) {
- __remove_wait_queue(&x->wait, &wait);
- return -ERESTARTSYS;
+ timeout = -ERESTARTSYS;
+ break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
timeout = schedule_timeout(timeout);
spin_lock_irq(&x->wait.lock);
- if (!timeout) {
- __remove_wait_queue(&x->wait, &wait);
- return timeout;
- }
- } while (!x->done);
+ } while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ return timeout;
}
x->done--;
- return timeout;
+ return timeout ?: 1;
}
static long __sched
@@ -5889,6 +5887,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
next = pick_next_task(rq, rq->curr);
if (!next)
break;
+ next->sched_class->put_prev_task(rq, next);
migrate_dead(dead_cpu, next);
}
@@ -8503,6 +8502,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
rt_period = (u64)rt_period_us * NSEC_PER_USEC;
rt_runtime = tg->rt_bandwidth.rt_runtime;
+ if (rt_period == 0)
+ return -EINVAL;
+
return tg_set_bandwidth(tg, rt_period, rt_runtime);
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..0f3c19197fa4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
spin_unlock(&rt_rq->rt_runtime_lock);
- }
+ } else if (rt_rq->rt_nr_running)
+ idle = 0;
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..f6d2e57b99a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -132,8 +132,6 @@ extern int sysctl_userprocess_debug;
extern int spin_retry;
#endif
-extern int sysctl_hz_timer;
-
#ifdef CONFIG_BSD_PROCESS_ACCT
extern int acct_parm[];
#endif
@@ -563,16 +561,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
-#ifdef CONFIG_NO_IDLE_HZ
- {
- .ctl_name = KERN_HZ_TIMER,
- .procname = "hz_timer",
- .data = &sysctl_hz_timer,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-#endif
{
.ctl_name = KERN_S390_USER_DEBUG_LOGGING,
.procname = "userprocess_debug",
diff --git a/kernel/timer.c b/kernel/timer.c
index ceacc6626572..ef3fa6906e8f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -812,7 +812,7 @@ static inline void __run_timers(struct tvec_base *base)
spin_unlock_irq(&base->lock);
}
-#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
+#ifdef CONFIG_NO_HZ
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a cpus is idle.
@@ -947,14 +947,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
return cmp_next_hrtimer_event(now, expires);
}
-
-#ifdef CONFIG_NO_IDLE_HZ
-unsigned long next_timer_interrupt(void)
-{
- return get_next_timer_interrupt(jiffies);
-}
-#endif
-
#endif
#ifndef CONFIG_VIRT_CPU_ACCOUNTING