From b7522056ec1a62a5f3246627e12ef48e6274c21c Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Wed, 13 Jan 2016 18:39:58 +0300
Subject: ftrace: Remove unused nr_trampolines var

It's not needed & not used since introducing old_hash: commit fef5aeeee9e371
("ftrace: Replace tramp_hash with old_*_hash to save space").

Link: http://lkml.kernel.org/r/1452699598-27610-1-git-send-email-0x7f454c46@gmail.com

Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 60048c50404e..65460ad93c7b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -164,7 +164,6 @@ struct ftrace_ops {
 	ftrace_func_t			saved_func;
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_ops_hash		old_hash;
-- 
cgit v1.2.3


From 203cbf77de59fc8f13502dcfd11350c6d4a5c95f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 16:54:46 +0000
Subject: hrtimer: Handle remaining time proper for TIME_LOW_RES

If CONFIG_TIME_LOW_RES is enabled we add a jiffie to the relative timeout to
prevent short sleeps, but we do not account for that in interfaces which
retrieve the remaining time.

Helge observed that timerfd can return a remaining time larger than the
relative timeout. That's not expected and breaks userland test programs.

Store the information that the timer was armed relative and provide functions
to adjust the remaining time. To avoid bloating the hrtimer struct make state
a u8, which as a bonus results in better code on x86 at least.

Reported-and-tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: dhowells@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160114164159.273328486@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 76dd4f0da5ca..2ead22dd74a0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -87,7 +87,8 @@ enum hrtimer_restart {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @start_pid: timer statistics field to store the pid of the task which
+ * @is_rel:	Set if the timer was armed relative
+ * @start_pid:  timer statistics field to store the pid of the task which
  *		started the timer
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
@@ -101,7 +102,8 @@ struct hrtimer {
 	ktime_t				_softexpires;
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
-	unsigned long			state;
+	u8				state;
+	u8				is_rel;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -321,6 +323,27 @@ static inline void clock_was_set_delayed(void) { }
 
 #endif
 
+static inline ktime_t
+__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
+{
+	ktime_t rem = ktime_sub(timer->node.expires, now);
+
+	/*
+	 * Adjust relative timers for the extra we added in
+	 * hrtimer_start_range_ns() to prevent short timeouts.
+	 */
+	if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
+		rem.tv64 -= hrtimer_resolution;
+	return rem;
+}
+
+static inline ktime_t
+hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
+{
+	return __hrtimer_expires_remaining_adjusted(timer,
+						    timer->base->get_time());
+}
+
 extern void clock_was_set(void);
 #ifdef CONFIG_TIMERFD
 extern void timerfd_clock_was_set(void);
@@ -390,7 +413,12 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 }
 
 /* Query timers: */
-extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+
+static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
+{
+	return __hrtimer_get_remaining(timer, false);
+}
 
 extern u64 hrtimer_get_next_event(void);
 
-- 
cgit v1.2.3


From b4ace4f1ae07691b4f6ea9f3e92efbec083df058 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Tue, 19 Jan 2016 14:27:08 -0500
Subject: soreuseport: fix NULL ptr dereference SO_REUSEPORT after bind

Marc Dionne discovered a NULL pointer dereference when setting
SO_REUSEPORT on a socket after it is bound.
This patch removes the assumption that at least one socket in the
reuseport group is bound with the SO_REUSEPORT option before other
bind calls occur.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Reported-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: Craig Gallek <kraig@google.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock_reuseport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 7dda3d7adba8..aecd30308d50 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,7 +16,7 @@ struct sock_reuseport {
 };
 
 extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
-- 
cgit v1.2.3


From c240906d36653944d5c049df7ce667a7e8bea6ac Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 19 Jan 2016 10:46:58 +0000
Subject: drm/atomic-helper: Export framebuffer_changed()

The Rockchip driver cannot use drm_atomic_helper_wait_for_vblanks()
because it has hardware counters for neither vblanks nor scanlines.

In order to simplify re-implementing the functionality for this driver,
export the framebuffer_changed() helper so it can be reused.

Signed-off-by: John Keeping <john@metanate.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/drm/drm_atomic_helper.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008dc08e2..fe5efada9d68 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 			     struct drm_atomic_state *state,
 			     bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 					struct drm_atomic_state *old_state);
 
-- 
cgit v1.2.3


From b16c29191dc89bd877af99a7b04ce4866728a3e0 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 18 Jan 2016 19:23:51 -0500
Subject: netfilter: nf_conntrack: use safer way to lock all buckets

When we need to lock all buckets in the connection hashtable we'd attempt to
lock 1024 spinlocks, which is way more preemption levels than supported by
the kernel. Furthermore, this behavior was hidden by checking if lockdep is
enabled, and if it was - use only 8 buckets(!).

Fix this by using a global lock and synchronize all buckets on it when we
need to lock them all. This is pretty heavyweight, but is only done when we
need to resize the hashtable, and that doesn't happen often enough (or at all).

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 788ef58a66b9..62e17d1319ff 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -79,12 +79,10 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *proto);
 
-#ifdef CONFIG_LOCKDEP
-# define CONNTRACK_LOCKS 8
-#else
-# define CONNTRACK_LOCKS 1024
-#endif
+#define CONNTRACK_LOCKS 1024
+
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+void nf_conntrack_lock(spinlock_t *lock);
 
 extern spinlock_t nf_conntrack_expect_lock;
 
-- 
cgit v1.2.3


From 386744425e35e04984c6e741c7750fd6eef1a9df Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 1 Jul 2015 14:17:58 +0200
Subject: swiotlb: Make linux/swiotlb.h standalone includible

This header file uses the enum dma_data_direction and struct page types
without explicitly including the corresponding header files. This makes
it rely on the includer to have included the proper headers before.

To fix this, include linux/dma-direction.h and forward-declare struct
page. The swiotlb_free() function is also annotated __init, therefore
requires linux/init.h to be included as well.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e7a018eaf3a2..017fced60242 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
-- 
cgit v1.2.3


From ce87fc6ce3f9f4488546187e3757cf666d9d4a2a Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@kernel.org>
Date: Wed, 20 Jan 2016 17:59:49 -0800
Subject: gro: Make GRO aware of lightweight tunnels.

GRO is currently not aware of tunnel metadata generated by lightweight
tunnels and stored in the dst. This leads to two possible problems:
 * Incorrectly merging two frames that have different metadata.
 * Leaking of allocated metadata from merged frames.

This avoids those problems by comparing the tunnel information before
merging, similar to how we handle other metadata (such as vlan tags),
and releasing any state when we are done.

Reported-by: John <john.phillips5@hpe.com>
Fixes: 2e15ea39 ("ip_gre: Add support to collect tunnel metadata.")
Signed-off-by: Jesse Gross <jesse@kernel.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_metadata.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 6816f0fa5693..30a56ab2ccfb 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
 	return dst && !(dst->flags & DST_METADATA);
 }
 
+static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
+				       const struct sk_buff *skb_b)
+{
+	const struct metadata_dst *a, *b;
+
+	if (!(skb_a->_skb_refdst | skb_b->_skb_refdst))
+		return 0;
+
+	a = (const struct metadata_dst *) skb_dst(skb_a);
+	b = (const struct metadata_dst *) skb_dst(skb_b);
+
+	if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len)
+		return 1;
+
+	return memcmp(&a->u.tun_info, &b->u.tun_info,
+		      sizeof(a->u.tun_info) + a->u.tun_info.options_len);
+}
+
 struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
 struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
 
-- 
cgit v1.2.3


From fae3fde65138b6071b1b0e0b567d4058a8b6a88c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Jan 2016 15:00:50 +0100
Subject: perf: Collapse and fix event_function_call() users

There is one common bug left in all the event_function_call() users,
between loading ctx->task and getting to the remote_function(),
ctx->task can already have been changed.

Therefore we need to double check and retry if ctx->task != current.

Insert another trampoline specific to event_function_call() that
checks for this and further validates state. This also allows getting
rid of the active/inactive functions.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a48f16a..6612732d8fd0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1044,7 +1044,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
-extern int __perf_event_disable(void *info);
+extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
-- 
cgit v1.2.3


From 4877be9019baaf1432f9117bff4873e4ad518d91 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 21 Jan 2016 15:56:28 -0500
Subject: net: sock: remove dead cgroup methods from struct proto

The cgroup methods are no longer used after baac50bbc3cd ("net:
tcp_memcontrol: simplify linkage between socket and page counter").
The hunk to delete them was included in the original patch but must
have gotten lost during conflict resolution on the way upstream.

Fixes: baac50bbc3cd ("net: tcp_memcontrol: simplify linkage between socket and page counter")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b9e7b3d863a0..f5ea148853e2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1035,18 +1035,6 @@ struct proto {
 	struct list_head	node;
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-	/*
-	 * cgroup specific init/deinit functions. Called once for all
-	 * protocols that implement it, from cgroups populate function.
-	 * This function has to setup any files the protocol want to
-	 * appear in the kmem cgroup filesystem.
-	 */
-	int			(*init_cgroup)(struct mem_cgroup *memcg,
-					       struct cgroup_subsys *ss);
-	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
-	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
 	int			(*diag_destroy)(struct sock *sk, int err);
 };
-- 
cgit v1.2.3


From 6a84f57241e1fb9fb6772256f538d1073359a32d Mon Sep 17 00:00:00 2001
From: Gayatri Kammela <gayatri.kammela@intel.com>
Date: Thu, 21 Jan 2016 14:02:39 -0800
Subject: raid6/algos.c : bug fix : Add the missing definitions to the pq.h
 file

Adding these pr_info and pr_err definitions so as to allow code to be
compiled successfully for testing in userspace, since the printk has
been replaced by pr_info and pr_err in algos.c

Absence of these definitions result in the compilation errors
such as ' undefined reference to `pr_info' ' ' undefined reference to
`pr_err' '

Cc: NeilBrown <neilb@suse.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 include/linux/raid/pq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a7a06d1dcf9c..a0118d5929a9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -152,6 +152,8 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 
 # define jiffies	raid6_jiffies()
 # define printk 	printf
+# define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+# define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
 # define GFP_KERNEL	0
 # define __get_free_pages(x, y)	((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
 						     PROT_READ|PROT_WRITE,   \
-- 
cgit v1.2.3


From e93ad19d05648397ef3bcb838d26aec06c245dc0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Jan 2016 12:18:41 -0500
Subject: cpuset: make mm migration asynchronous

If "cpuset.memory_migrate" is set, when a process is moved from one
cpuset to another with a different memory node mask, pages in used by
the process are migrated to the new set of nodes.  This was performed
synchronously in the ->attach() callback, which is synchronized
against process management.  Recently, the synchronization was changed
from per-process rwsem to global percpu rwsem for simplicity and
optimization.

Combined with the synchronous mm migration, this led to deadlocks
because mm migration could schedule a work item which may in turn try
to create a new worker blocking on the process management lock held
from cgroup process migration path.

This heavy an operation shouldn't be performed synchronously from that
deep inside cgroup migration in the first place.  This patch punts the
actual migration to an ordered workqueue and updates cgroup process
migration and cpuset config update paths to flush the workqueue after
all locks are released.  This way, the operations still seem
synchronous to userland without entangling mm migration with process
management synchronization.  CPU hotplug can also invoke mm migration
but there's no reason for it to wait for mm migrations and thus
doesn't synchronize against their completions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: stable@vger.kernel.org # v4.4+
---
 include/linux/cpuset.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 85a868ccb493..fea160ee5803 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,6 +137,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 	task_unlock(current);
 }
 
+extern void cpuset_post_attach_flush(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -243,6 +245,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
 	return false;
 }
 
+static inline void cpuset_post_attach_flush(void)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
-- 
cgit v1.2.3


From aa226ff4a1ce79f229c6b7a4c0a14e17fececd01 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Jan 2016 15:31:11 -0500
Subject: cgroup: make sure a parent css isn't offlined before its children

There are three subsystem callbacks in css shutdown path -
css_offline(), css_released() and css_free().  Except for
css_released(), cgroup core didn't guarantee the order of invocation.
css_offline() or css_free() could be called on a parent css before its
children.  This behavior is unexpected and led to bugs in cpu and
memory controller.

This patch updates offline path so that a parent css is never offlined
before its children.  Each css keeps online_cnt which reaches zero iff
itself and all its children are offline and offline_css() is invoked
only after online_cnt reaches zero.

This fixes the memory controller bug and allows the fix for cpu
controller.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reported-by: Brian Christiansen <brian.o.christiansen@gmail.com>
Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com
Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
---
 include/linux/cgroup-defs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7f540f7f588d..789471dba6fb 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -127,6 +127,12 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
+	/*
+	 * Incremented by online self and children.  Used to guarantee that
+	 * parents are not offlined before their children.
+	 */
+	atomic_t online_cnt;
+
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
-- 
cgit v1.2.3


From facc432faa59414bd7c60c307ff1645154a66c98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Jan 2016 11:43:44 +0100
Subject: net: simplify napi_synchronize() to avoid warnings

The napi_synchronize() function is defined twice: The definition
for SMP builds waits for other CPUs to be done, while the uniprocessor
variant just contains a barrier and ignores its argument.

In the mvneta driver, this leads to a warning about an unused variable
when we lookup the NAPI struct of another CPU and then don't use it:

ethernet/marvell/mvneta.c: In function 'mvneta_percpu_notifier':
ethernet/marvell/mvneta.c:2910:30: error: unused variable 'other_port' [-Werror=unused-variable]

There are no other CPUs on a UP build, so that code never runs, but
gcc does not know this.

The nicest solution seems to be to turn the napi_synchronize() helper
into an inline function for the UP case as well, as that leads gcc to
not complain about the argument being unused. Once we do that, we can
also combine the two cases into a single function definition and use
if(IS_ENABLED()) rather than #ifdef to make it look a bit nicer.

The warning first came up in linux-4.4, but I failed to catch it
earlier.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: f86428854480 ("net: mvneta: Statically assign queues to CPUs")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ac140dcb789..289c2314d766 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -512,7 +512,6 @@ static inline void napi_enable(struct napi_struct *n)
 	clear_bit(NAPI_STATE_NPSVC, &n->state);
 }
 
-#ifdef CONFIG_SMP
 /**
  *	napi_synchronize - wait until NAPI is not running
  *	@n: napi context
@@ -523,12 +522,12 @@ static inline void napi_enable(struct napi_struct *n)
  */
 static inline void napi_synchronize(const struct napi_struct *n)
 {
-	while (test_bit(NAPI_STATE_SCHED, &n->state))
-		msleep(1);
+	if (IS_ENABLED(CONFIG_SMP))
+		while (test_bit(NAPI_STATE_SCHED, &n->state))
+			msleep(1);
+	else
+		barrier();
 }
-#else
-# define napi_synchronize(n)	barrier()
-#endif
 
 enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
-- 
cgit v1.2.3


From 71f50c6d9a2276f3ec85384bffe2aee1962f4669 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 22 Jan 2016 01:33:51 +0900
Subject: of: drop symbols declared by _OF_DECLARE() from modules

The users of this macro (OF_EARLYCON_DECLARE, CLK_OF_DECLARE,
IRQCHIP_DECLARE, etc.) are only parsed in the early boot stage.
Such symbols contained in modules are never used.

This commit fixes the link error introduced by commit b8d20e06eaad
("serial: 8250_uniphier: add earlycon support"); the combination
of CONFIG_SERIAL_8250_UNIPHIER=m and CONFIG_SERIAL_8250_CONSOLE=y
fails to link:

ERROR: "early_serial8250_setup" [drivers/tty/serial/8250/8250_uniphier.ko] undefined!

Fixes: b8d20e06eaad ("serial: 8250_uniphier: add earlycon support")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index dd10626a615f..dc6e39696b64 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
 	return num;
 }
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
 #define _OF_DECLARE(table, name, compat, fn, fn_type)			\
 	static const struct of_device_id __of_table_##name		\
 		__used __section(__##table##_of_table)			\
-- 
cgit v1.2.3


From 7fd13615992ae0fc132c9abb24511be43f3b5d9d Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Thu, 21 Jan 2016 09:48:14 -0200
Subject: tracing/dma-buf/fence: Fix timeline str value on
 fence_annotate_wait_on

timeline was wrongly assigned with ->get_driver_name().

Link: http://lkml.kernel.org/r/1453376895-30747-1-git-send-email-gustavo@padovan.org

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/fence.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b82896..d6dfa05ba322 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
 
 	TP_STRUCT__entry(
 		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
 		__field(unsigned int, context)
 		__field(unsigned int, seqno)
 
-- 
cgit v1.2.3


From fb3296335500aaff61333df8eabbccf28761c79d Mon Sep 17 00:00:00 2001
From: Danesh Petigara <dpetigara@broadcom.com>
Date: Mon, 11 Jan 2016 13:22:26 -0800
Subject: drivers: ata: wake port before DMA stop for ALPM

The AHCI driver code stops and starts port DMA engines at will
without considering the power state of the particular port. The
AHCI specification isn't very clear on how to handle this scenario,
leaving implementation open to interpretation.

Broadcom's STB SATA host controller is unable to handle port DMA
controller restarts when the port in question is in low power mode.
When a port enters partial or slumber mode, its PHY is powered down.
When a controller restart is requested, the controller's internal
state machine expects the PHY to be brought back up by software which
never happens in this case, resulting in failures.

To avoid this situation, logic is added to manually wake up the port
just before its DMA engine is stopped, if the port happens to be in
a low power state. HBA initiated power management ensures that the port
eventually returns to its configured low power state, when the link is
idle (as per the conditions listed in the spec). A new host flag is also
added to ensure this logic is only exercised for hosts with the above
limitation.

tj: Formatting changes.

Signed-off-by: Danesh Petigara <dpetigara@broadcom.com>
Reviewed-by: Markus Mayer <mmayer@broadcom.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 851821bfd553..bec2abbd7ab2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -526,6 +526,7 @@ enum ata_lpm_policy {
 enum ata_lpm_hints {
 	ATA_LPM_EMPTY		= (1 << 0), /* port empty/probing */
 	ATA_LPM_HIPM		= (1 << 1), /* may use HIPM */
+	ATA_LPM_WAKE_ONLY	= (1 << 2), /* only wake up link */
 };
 
 /* forward declarations */
-- 
cgit v1.2.3


From 530cbe100ef7587aa5b5ac3a4b670cda4d50e598 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Jan 2016 13:52:25 +0000
Subject: irqdomain: Allow domain lookup with DOMAIN_BUS_WIRED token

Let's take the (outlandish) example of an interrupt controller
capable of handling both wired interrupts and PCI MSIs.

With the current code, the PCI MSI domain is going to be tagged
with DOMAIN_BUS_PCI_MSI, and the wired domain with DOMAIN_BUS_ANY.

Things get hairy when we start looking up the domain for a wired
interrupt (typically when creating it based on some firmware
information - DT or ACPI).

In irq_create_fwspec_mapping(), we perform the lookup using
DOMAIN_BUS_ANY, which is actually used as a wildcard. This gives
us one chance out of two to end up with the wrong domain, and
we try to configure a wired interrupt with the MSI domain.
Everything grinds to a halt pretty quickly.

What we really need to do is to start looking for a domain that
would uniquely identify a wired interrupt domain, and only use
DOMAIN_BUS_ANY as a fallback.

In order to solve this, let's introduce a new DOMAIN_BUS_WIRED
token, which is going to be used exactly as described above.
Of course, this depends on the irqchip to setup the domain
bus_token, and nobody had to implement this so far.

Only so far.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1453816347-32720-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f64622ad02c1..04579d9fbce4 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -70,6 +70,7 @@ struct irq_fwspec {
  */
 enum irq_domain_bus_token {
 	DOMAIN_BUS_ANY		= 0,
+	DOMAIN_BUS_WIRED,
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
-- 
cgit v1.2.3


From 602eb48966d7b7f7e64dca8d9ea2842d83bfae73 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 24 Jan 2016 17:36:04 +0000
Subject: drm/etnaviv: add further minor features and varyings count

Export further minor feature bitmasks and the varyings count from
the GPU specifications registers to userspace.

Acked-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 include/uapi/drm/etnaviv_drm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989ad6851..f95e1c43c3fb 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
-- 
cgit v1.2.3


From 0bfd464d3fdd5bb322f9cace4cc47f1796545cf7 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sat, 9 Jan 2016 21:13:44 -0800
Subject: tty: Wait interruptibly for tty lock on reopen

Allow a signal to interrupt the wait for a tty reopen; eg., if
the tty has starting final close and is waiting for the device to
drain.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Cc: stable <stable@vger.kernel.org> # 4.4
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 2fd8708ea888..d9fb4b043f56 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -649,6 +649,7 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
+extern int  tty_lock_interruptible(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
-- 
cgit v1.2.3


From b3c6de492b9ea30a8dcc535d4dc2eaaf0bb3f116 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Thu, 21 Jan 2016 16:47:29 +0100
Subject: cleancache: constify cleancache_ops structure

The cleancache_ops structure is never modified, so declare it as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/cleancache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index bda5ec0b4b4d..cb3e14234502 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -37,7 +37,7 @@ struct cleancache_ops {
 	void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
-- 
cgit v1.2.3


From a39bb9a0557a3fc860c3c9d67a7326b0d2f1bd66 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Thu, 7 Jan 2016 05:06:13 +0800
Subject: include/linux/cleancache.h: Clean up code

Let cleancache_fs_enabled() call cleancache_fs_enabled_mapping()
directly.

Remove redundant variable ret in cleancache_get_page().

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/cleancache.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index cb3e14234502..fccf7f44139d 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -48,14 +48,14 @@ extern void __cleancache_invalidate_fs(struct super_block *);
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-	return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
 	return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@ static inline void cleancache_init_shared_fs(struct super_block *sb)
 
 static inline int cleancache_get_page(struct page *page)
 {
-	int ret = -1;
-
 	if (cleancache_enabled && cleancache_fs_enabled(page))
-		ret = __cleancache_get_page(page);
-	return ret;
+		return __cleancache_get_page(page);
+	return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
-- 
cgit v1.2.3


From 1eed677933b816978abc4e3e18ecae5f254cb9be Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 22 Jan 2016 01:49:07 +0800
Subject: sctp: fix the transport dead race check by using atomic_add_unless on
 refcnt

Now when __sctp_lookup_association is running in BH, it will try to
check if t->dead is set, but meanwhile other CPUs may be freeing this
transport and this assoc and if it happens that
__sctp_lookup_association checked t->dead a bit too early, it may think
that the association is still good while it was already freed.

So we fix this race by using atomic_add_unless in sctp_transport_hold.
After we get one transport from hashtable, we will hold it only when
this transport's refcnt is not 0, so that we can make sure t->asoc
cannot be freed before we hold the asoc again.

Note that sctp association is not freed using RCU so we can't use
atomic_add_unless() with it as it may just be too late for that either.

Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path")
Reported-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 20e72129be1c..344da04e2e30 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -955,7 +955,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
-void sctp_transport_hold(struct sctp_transport *);
+int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
-- 
cgit v1.2.3


From 47faa1e4c50ec26e6e75dcd1ce53f064bd45f729 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 22 Jan 2016 01:49:09 +0800
Subject: sctp: remove the dead field of sctp_transport

After we use refcnt to check if transport is alive, the dead can be
removed from sctp_transport.

The traversal of transport_addr_list in procfs dump is using
list_for_each_entry_rcu, no need to check if it has been freed.

sctp_generate_t3_rtx_event and sctp_generate_heartbeat_event is
protected by sock lock, it's not necessary to check dead, either.
also, the timers are cancelled when sctp_transport_free() is
called, that it doesn't wait for refcnt to reach 0 to cancel them.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 344da04e2e30..205630bb5010 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -756,7 +756,6 @@ struct sctp_transport {
 
 	/* Reference counting. */
 	atomic_t refcnt;
-	__u32	 dead:1,
 		/* RTO-Pending : A flag used to track if one of the DATA
 		 *		chunks sent to this address is currently being
 		 *		used to compute a RTT. If this flag is 0,
@@ -766,7 +765,7 @@ struct sctp_transport {
 		 *		calculation completes (i.e. the DATA chunk
 		 *		is SACK'd) clear this flag.
 		 */
-		 rto_pending:1,
+	__u32	rto_pending:1,
 
 		/*
 		 * hb_sent : a flag that signals that we have a pending
-- 
cgit v1.2.3


From e03e7ee34fdd1c3ef494949a75cb8c61c7265fa9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Date: Mon, 25 Jan 2016 20:59:49 -0800
Subject: perf/bpf: Convert perf_event_array to use struct file

Robustify refcounting.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20160126045947.GA40151@ast-mbp.thefacebook.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6612732d8fd0..4f90434b8d64 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -729,7 +729,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1070,7 +1070,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
-static inline struct perf_event *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From c6e5b73242d2d9172ea880483bc4ba7ffca0cfb2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 15 Jan 2016 16:07:41 +0200
Subject: perf: Synchronously clean up child events

The orphan cleanup workqueue doesn't always catch orphans, for example,
if they never schedule after they are orphaned. IOW, the event leak is
still very real. It also wouldn't work for kernel counters.

Doing it synchonously is a little hairy due to lock inversion issues,
but is made to work.

Patch based on work by Alexander Shishkin.

Suggested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f90434b8d64..b35a61a481fa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -634,9 +634,6 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
-
-	struct delayed_work		orphans_remove;
-	bool				orphans_remove_sched;
 };
 
 /*
-- 
cgit v1.2.3


From 114f9f1e038eb23935c20fb54f49f07caaa0546d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 26 Jan 2016 17:19:09 -0500
Subject: Bluetooth: L2CAP: Introduce proper defines for PSM ranges

Having proper defines makes the code a bit readable, it also avoids
duplicating hard-coded values since these are also needed when
auto-allocating PSM values (in a subsequent patch).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 52899291f401..5ee3c689c863 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -252,6 +252,12 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_3DSP		0x0021
 #define L2CAP_PSM_IPSP		0x0023 /* 6LoWPAN */
 
+#define L2CAP_PSM_DYN_START	0x1001
+#define L2CAP_PSM_DYN_END	0xffff
+#define L2CAP_PSM_AUTO_END	0x10ff
+#define L2CAP_PSM_LE_DYN_START  0x0080
+#define L2CAP_PSM_LE_DYN_END	0x00ff
+
 /* channel identifier */
 #define L2CAP_CID_SIGNALING	0x0001
 #define L2CAP_CID_CONN_LESS	0x0002
-- 
cgit v1.2.3


From 0d9bacb6c8265e7aa75ac28ae9b5d7748065942f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm+renesas@opensource.se>
Date: Tue, 19 Jan 2016 14:28:48 +0900
Subject: iommu: Update struct iommu_ops comments

Update the comments around struct iommu_ops to match
current state and fix a few typos while at it.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f28dff313b07..a5c539fa5d2b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -133,8 +133,9 @@ struct iommu_dm_region {
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@ struct iommu_dm_region {
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@ struct iommu_ops {
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the numer of window per domain */
+	/* Set the number of windows per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the numer of window per domain */
+	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
-- 
cgit v1.2.3


From 23d11a58a9a60dcb52c8fc6494efce908b24c295 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 29 Jan 2016 05:59:46 -0500
Subject: workqueue: skip flush dependency checks for legacy workqueues

fca839c00a12 ("workqueue: warn if memory reclaim tries to flush
!WQ_MEM_RECLAIM workqueue") implemented flush dependency warning which
triggers if a PF_MEMALLOC task or WQ_MEM_RECLAIM workqueue tries to
flush a !WQ_MEM_RECLAIM workquee.

This assumes that workqueues marked with WQ_MEM_RECLAIM sit in memory
reclaim path and making it depend on something which may need more
memory to make forward progress can lead to deadlocks.  Unfortunately,
workqueues created with the legacy create*_workqueue() interface
always have WQ_MEM_RECLAIM regardless of whether they are depended
upon memory reclaim or not.  These spurious WQ_MEM_RECLAIM markings
cause spurious triggering of the flush dependency checks.

  WARNING: CPU: 0 PID: 6 at kernel/workqueue.c:2361 check_flush_dependency+0x138/0x144()
  workqueue: WQ_MEM_RECLAIM deferwq:deferred_probe_work_func is flushing !WQ_MEM_RECLAIM events:lru_add_drain_per_cpu
  ...
  Workqueue: deferwq deferred_probe_work_func
  [<c0017acc>] (unwind_backtrace) from [<c0013134>] (show_stack+0x10/0x14)
  [<c0013134>] (show_stack) from [<c0245f18>] (dump_stack+0x94/0xd4)
  [<c0245f18>] (dump_stack) from [<c0026f9c>] (warn_slowpath_common+0x80/0xb0)
  [<c0026f9c>] (warn_slowpath_common) from [<c0026ffc>] (warn_slowpath_fmt+0x30/0x40)
  [<c0026ffc>] (warn_slowpath_fmt) from [<c00390b8>] (check_flush_dependency+0x138/0x144)
  [<c00390b8>] (check_flush_dependency) from [<c0039ca0>] (flush_work+0x50/0x15c)
  [<c0039ca0>] (flush_work) from [<c00c51b0>] (lru_add_drain_all+0x130/0x180)
  [<c00c51b0>] (lru_add_drain_all) from [<c00f728c>] (migrate_prep+0x8/0x10)
  [<c00f728c>] (migrate_prep) from [<c00bfbc4>] (alloc_contig_range+0xd8/0x338)
  [<c00bfbc4>] (alloc_contig_range) from [<c00f8f18>] (cma_alloc+0xe0/0x1ac)
  [<c00f8f18>] (cma_alloc) from [<c001cac4>] (__alloc_from_contiguous+0x38/0xd8)
  [<c001cac4>] (__alloc_from_contiguous) from [<c001ceb4>] (__dma_alloc+0x240/0x278)
  [<c001ceb4>] (__dma_alloc) from [<c001cf78>] (arm_dma_alloc+0x54/0x5c)
  [<c001cf78>] (arm_dma_alloc) from [<c0355ea4>] (dmam_alloc_coherent+0xc0/0xec)
  [<c0355ea4>] (dmam_alloc_coherent) from [<c039cc4c>] (ahci_port_start+0x150/0x1dc)
  [<c039cc4c>] (ahci_port_start) from [<c0384734>] (ata_host_start.part.3+0xc8/0x1c8)
  [<c0384734>] (ata_host_start.part.3) from [<c03898dc>] (ata_host_activate+0x50/0x148)
  [<c03898dc>] (ata_host_activate) from [<c039d558>] (ahci_host_activate+0x44/0x114)
  [<c039d558>] (ahci_host_activate) from [<c039f05c>] (ahci_platform_init_host+0x1d8/0x3c8)
  [<c039f05c>] (ahci_platform_init_host) from [<c039e6bc>] (tegra_ahci_probe+0x448/0x4e8)
  [<c039e6bc>] (tegra_ahci_probe) from [<c0347058>] (platform_drv_probe+0x50/0xac)
  [<c0347058>] (platform_drv_probe) from [<c03458cc>] (driver_probe_device+0x214/0x2c0)
  [<c03458cc>] (driver_probe_device) from [<c0343cc0>] (bus_for_each_drv+0x60/0x94)
  [<c0343cc0>] (bus_for_each_drv) from [<c03455d8>] (__device_attach+0xb0/0x114)
  [<c03455d8>] (__device_attach) from [<c0344ab8>] (bus_probe_device+0x84/0x8c)
  [<c0344ab8>] (bus_probe_device) from [<c0344f48>] (deferred_probe_work_func+0x68/0x98)
  [<c0344f48>] (deferred_probe_work_func) from [<c003b738>] (process_one_work+0x120/0x3f8)
  [<c003b738>] (process_one_work) from [<c003ba48>] (worker_thread+0x38/0x55c)
  [<c003ba48>] (worker_thread) from [<c0040f14>] (kthread+0xdc/0xf4)
  [<c0040f14>] (kthread) from [<c000f778>] (ret_from_fork+0x14/0x3c)

Fix it by marking workqueues created via create*_workqueue() with
__WQ_LEGACY and disabling flush dependency checks on them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Thierry Reding <thierry.reding@gmail.com>
Link: http://lkml.kernel.org/g/20160126173843.GA11115@ulmo.nvidia.com
Fixes: fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue")
---
 include/linux/workqueue.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0e32bc71245e..ca73c503b92a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -311,6 +311,7 @@ enum {
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
+	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -411,12 +412,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
-	alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)				\
-	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
-			1, (name))
+	alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND |	\
+			WQ_MEM_RECLAIM, 1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
+	alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-- 
cgit v1.2.3


From 21603fc45bdef3696f45d80a1c9d730e06b925c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@higgsboson.tk>
Date: Wed, 27 Jan 2016 22:54:06 +0100
Subject: tcp: Change reference to experimental CWND RFC.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jörg Thalheim <joerg@higgsboson.tk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8ea19977ea53..f6f8f032c73e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -216,7 +216,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
-/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+/* TCP initial congestion window as per rfc6928 */
 #define TCP_INIT_CWND		10
 
 /* Bit Flags for sysctl_tcp_fastopen */
-- 
cgit v1.2.3


From 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Mon, 18 Jan 2016 14:09:27 -0600
Subject: target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors

In a couple places we are not converting to/from the Linux
block layer 512 bytes sectors.

1.

The request queue values and what we do are a mismatch of
things:

max_discard_sectors - This is in linux block layer 512 byte
sectors. We are just copying this to max_unmap_lba_count.

discard_granularity - This is in bytes. We are converting it
to Linux block layer 512 byte sectors.

discard_alignment - This is in bytes. We are just copying
this over.

The problem is that the core LIO code exports these values in
spc_emulate_evpd_b0 and we use them to test request arguments
in sbc_execute_unmap, but we never convert to the block size
we export to the initiator. If we are not using 512 byte sectors
then we are exporting the wrong values or are checks are off.
And, for the discard_alignment/bytes case we are just plain messed
up.

2.

blkdev_issue_discard's start and number of sector arguments
are supposed to be in linux block layer 512 byte sectors. We are
currently passing in the values we get from the initiator which
might be based on some other sector size.

There is a similar problem in iblock_execute_write_same where
the bio functions want values in 512 byte sectors but we are
passing in what we got from the initiator.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_backend.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 56cf8e485ef2..28ee5c2e6bcd 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
 bool target_sense_desc_format(struct se_device *dev);
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+				       struct request_queue *q, int block_size);
 
 #endif /* TARGET_CORE_BACKEND_H */
-- 
cgit v1.2.3


From 6f21c96a78b835259546d8f3fb4edff0f651d478 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 29 Jan 2016 12:30:19 +0100
Subject: ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail()

The current implementation of ip6_dst_lookup_tail basically
ignore the egress ifindex match: if the saddr is set,
ip6_route_output() purposefully ignores flowi6_oif, due
to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE
flag if saddr set"), if the saddr is 'any' the first route lookup
in ip6_dst_lookup_tail fails, but upon failure a second lookup will
be performed with saddr set, thus ignoring the ifindex constraint.

This commit adds an output route lookup function variant, which
allows the caller to specify lookup flags, and modify
ip6_dst_lookup_tail() to enforce the ifindex match on the second
lookup via said helper.

ip6_route_output() becames now a static inline function build on
top of ip6_route_output_flags(); as a side effect, out-of-tree
modules need now a GPL license to access the output route lookup
functionality.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 877f682989b8..295d291269e2 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 
 void ip6_route_input(struct sk_buff *skb);
 
-struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
-				   struct flowi6 *fl6);
+struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
+					 struct flowi6 *fl6, int flags);
+
+static inline struct dst_entry *ip6_route_output(struct net *net,
+						 const struct sock *sk,
+						 struct flowi6 *fl6)
+{
+	return ip6_route_output_flags(net, sk, fl6, 0);
+}
+
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 				   int flags);
 
-- 
cgit v1.2.3


From 65f87ee71852a754f7981d0653e7136039b8798a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 25 Jan 2016 17:23:18 -0800
Subject: fs, block: force direct-I/O for dax-enabled block devices

Similar to the file I/O path, re-direct all I/O to the DAX path for I/O
to a block-device special file.  Both regular files and device special
files can use the common filp->f_mapping->host lookup to determing is
DAX is enabled.

Otherwise, we confuse the DAX code that does not expect to find live
data in the page cache:

    ------------[ cut here ]------------
    WARNING: CPU: 0 PID: 7676 at mm/filemap.c:217
    __delete_from_page_cache+0x9f6/0xb60()
    Modules linked in:
    CPU: 0 PID: 7676 Comm: a.out Not tainted 4.4.0+ #276
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
     00000000ffffffff ffff88006d3f7738 ffffffff82999e2d 0000000000000000
     ffff8800620a0000 ffffffff86473d20 ffff88006d3f7778 ffffffff81352089
     ffffffff81658d36 ffffffff86473d20 00000000000000d9 ffffea0000009d60
    Call Trace:
     [<     inline     >] __dump_stack lib/dump_stack.c:15
     [<ffffffff82999e2d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
     [<ffffffff81352089>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
     [<ffffffff813522b9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
     [<ffffffff81658d36>] __delete_from_page_cache+0x9f6/0xb60 mm/filemap.c:217
     [<ffffffff81658fb2>] delete_from_page_cache+0x112/0x200 mm/filemap.c:244
     [<ffffffff818af369>] __dax_fault+0x859/0x1800 fs/dax.c:487
     [<ffffffff8186f4f6>] blkdev_dax_fault+0x26/0x30 fs/block_dev.c:1730
     [<     inline     >] wp_pfn_shared mm/memory.c:2208
     [<ffffffff816e9145>] do_wp_page+0xc85/0x14f0 mm/memory.c:2307
     [<     inline     >] handle_pte_fault mm/memory.c:3323
     [<     inline     >] __handle_mm_fault mm/memory.c:3417
     [<ffffffff816ecec3>] handle_mm_fault+0x2483/0x4640 mm/memory.c:3446
     [<ffffffff8127eff6>] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238
     [<ffffffff8127f738>] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331
     [<ffffffff812705c4>] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264
     [<ffffffff86338f78>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986
     [<ffffffff86336c36>] entry_SYSCALL_64_fastpath+0x16/0x7a
    arch/x86/entry/entry_64.S:185
    ---[ end trace dae21e0f85f1f98c ]---

Fixes: 5a023cdba50c ("block: enable dax for raw block devices")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Kirill A. Shutemov <kirill@shutemov.name>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Matthew Wilcox <willy@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a2046275cdf..b10002d4a5f5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2907,7 +2907,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
 
 static inline bool io_is_direct(struct file *filp)
 {
-	return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
+	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
 static inline int iocb_flags(struct file *file)
-- 
cgit v1.2.3


From 9f4736fe7ca804aa79b5916221bb13dfc6221a0f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 28 Jan 2016 20:13:39 -0800
Subject: block: revert runtime dax control of the raw block device

Dynamically enabling DAX requires that the page cache first be flushed
and invalidated.  This must occur atomically with the change of DAX mode
otherwise we confuse the fsync/msync tracking and violate data
durability guarantees.  Eliminate the possibilty of DAX-disabled to
DAX-enabled transitions for now and revisit this for the next cycle.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/fs.h      | 3 ---
 include/uapi/linux/fs.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b10002d4a5f5..ae681002100a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -484,9 +484,6 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
-#ifdef CONFIG_FS_DAX
-	int			bd_map_count;
-#endif
 };
 
 /*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 41e0433b4a83..149bec83a907 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -222,7 +222,6 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
-#define BLKDAXSET _IO(0x12,128)
 #define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
-- 
cgit v1.2.3


From d1a5f2b4d8a125943dcb6b032fc7eaefc2c78296 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 28 Jan 2016 20:25:31 -0800
Subject: block: use DAX for partition table reads

Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.

Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8204c3dc3800..818e45078929 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
 		dax_iodone_t);
+
+#ifdef CONFIG_FS_DAX
+struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+#else
+static inline struct page *read_dax_sector(struct block_device *bdev,
+		sector_t n)
+{
+	return ERR_PTR(-ENXIO);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
 				unsigned int flags, get_block_t, dax_iodone_t);
-- 
cgit v1.2.3


From 76e9f0ee52b0be5761e29847e0ef01f23f24f1df Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 22 Jan 2016 09:43:28 -0800
Subject: phys_to_pfn_t: use phys_addr_t

A dma_addr_t is potentially smaller than a phys_addr_t on some archs.
Don't truncate the address when doing the pfn conversion.

Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: Matthew Wilcox <willy@linux.intel.com>
[willy: fix pfn_t_to_phys as well]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/pfn_t.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 0703b5360d31..37448ab5fb5c 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
 	return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
 	return NULL;
 }
 
-static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
 {
 	return PFN_PHYS(pfn_t_to_pfn(pfn));
 }
-- 
cgit v1.2.3


From 0f26922fe5dc5724b1adbbd54b21bad03590b4f3 Mon Sep 17 00:00:00 2001
From: zengtao <prime.zeng@huawei.com>
Date: Tue, 2 Feb 2016 11:38:34 +0800
Subject: cputime: Prevent 32bit overflow in time[val|spec]_to_cputime()

The datatype __kernel_time_t is u32 on 32bit platform, so its subject to
overflows in the timeval/timespec to cputime conversion.

Currently the following functions are affected:
1. setitimer()
2. timer_create/timer_settime()
3. sys_clock_nanosleep

This can happen on MIPS32 and ARM32 with "Full dynticks CPU time accounting"
enabled, which is required for CONFIG_NO_HZ_FULL.

Enforce u64 conversion to prevent the overflow.

Fixes: 31c1fc818715 ("ARM: Kconfig: allow full nohz CPU accounting")
Signed-off-by: zengtao <prime.zeng@huawei.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: <fweisbec@gmail.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1454384314-154784-1-git-send-email-prime.zeng@huawei.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/cputime_nsecs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index 0419485891f2..0f1c6f315cdc 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t;
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
 	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
@@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
 static inline cputime_t timeval_to_cputime(const struct timeval *val)
 {
-	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
+			val->tv_usec * NSEC_PER_USEC;
 	return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
-- 
cgit v1.2.3


From 4b0e4e4af6c6dc8354dcb72182d52c1bc55f12fc Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 30 Jan 2016 07:59:32 +0200
Subject: drm: add helper to check for wc memory support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 include/drm/drm_cache.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 7bfb063029d8..461a0558bca4 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -35,4 +35,13 @@
 
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 
+static inline bool drm_arch_can_wc_memory(void)
+{
+#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
+	return false;
+#else
+	return true;
+#endif
+}
+
 #endif
-- 
cgit v1.2.3


From 2db8f9a1d86aa32a9cbf1a975882b4fa162f040f Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Tue, 26 Jan 2016 12:43:00 -0600
Subject: ACPI / CPPC: remove redundant mbox_send_message() declaration

Remove a redundant function declaration in cppc_acpi.h for
mbox_send_message().  That function is defined in mailbox_client.h,
which is already included.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/cppc_acpi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 717a29810473..dad8af3ebeb5 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -133,6 +133,5 @@ extern int acpi_get_psd_map(struct cpudata **);
 /* Methods to interact with the PCC mailbox controller. */
 extern struct mbox_chan *
 	pcc_mbox_request_channel(struct mbox_client *, unsigned int);
-extern int mbox_send_message(struct mbox_chan *chan, void *mssg);
 
 #endif /* _CPPC_ACPI_H*/
-- 
cgit v1.2.3


From 8244062ef1e54502ef55f54cced659913f244c3e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 3 Feb 2016 16:55:26 +1030
Subject: modules: fix longstanding /proc/kallsyms vs module insertion race.

For CONFIG_KALLSYMS, we keep two symbol tables and two string tables.
There's one full copy, marked SHF_ALLOC and laid out at the end of the
module's init section.  There's also a cut-down version that only
contains core symbols and strings, and lives in the module's core
section.

After module init (and before we free the module memory), we switch
the mod->symtab, mod->num_symtab and mod->strtab to point to the core
versions.  We do this under the module_mutex.

However, kallsyms doesn't take the module_mutex: it uses
preempt_disable() and rcu tricks to walk through the modules, because
it's used in the oops path.  It's also used in /proc/kallsyms.
There's nothing atomic about the change of these variables, so we can
get the old (larger!) num_symtab and the new symtab pointer; in fact
this is what I saw when trying to reproduce.

By grouping these variables together, we can use a
carefully-dereferenced pointer to ensure we always get one or the
other (the free of the module init section is already done in an RCU
callback, so that's safe).  We allocate the init one at the end of the
module init section, and keep the core one inside the struct module
itself (it could also have been allocated at the end of the module
core, but that's probably overkill).

Reported-by: Weilong Chen <chenweilong@huawei.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541
Cc: stable@kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 4560d8f1545d..2bb0c3085706 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -324,6 +324,12 @@ struct module_layout {
 #define __module_layout_align
 #endif
 
+struct mod_kallsyms {
+	Elf_Sym *symtab;
+	unsigned int num_symtab;
+	char *strtab;
+};
+
 struct module {
 	enum module_state state;
 
@@ -405,15 +411,10 @@ struct module {
 #endif
 
 #ifdef CONFIG_KALLSYMS
-	/*
-	 * We keep the symbol and string tables for kallsyms.
-	 * The core_* fields below are temporary, loader-only (they
-	 * could really be discarded after module init).
-	 */
-	Elf_Sym *symtab, *core_symtab;
-	unsigned int num_symtab, core_num_syms;
-	char *strtab, *core_strtab;
-
+	/* Protected by RCU and/or module_mutex: use rcu_dereference() */
+	struct mod_kallsyms *kallsyms;
+	struct mod_kallsyms core_kallsyms;
+	
 	/* Section attributes */
 	struct module_sect_attrs *sect_attrs;
 
-- 
cgit v1.2.3


From 06ab30034ed9c200a570ab13c017bde248ddb2a6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 31 Jan 2016 11:57:41 +0100
Subject: ALSA: rawmidi: Make snd_rawmidi_transmit() race-free

A kernel WARNING in snd_rawmidi_transmit_ack() is triggered by
syzkaller fuzzer:
  WARNING: CPU: 1 PID: 20739 at sound/core/rawmidi.c:1136
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82999e2d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff81352089>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
 [<ffffffff813522b9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
 [<ffffffff84f80bd5>] snd_rawmidi_transmit_ack+0x275/0x400 sound/core/rawmidi.c:1136
 [<ffffffff84fdb3c1>] snd_virmidi_output_trigger+0x4b1/0x5a0 sound/core/seq/seq_virmidi.c:163
 [<     inline     >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150
 [<ffffffff84f87ed9>] snd_rawmidi_kernel_write1+0x549/0x780 sound/core/rawmidi.c:1223
 [<ffffffff84f89fd3>] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1273
 [<ffffffff817b0323>] __vfs_write+0x113/0x480 fs/read_write.c:528
 [<ffffffff817b1db7>] vfs_write+0x167/0x4a0 fs/read_write.c:577
 [<     inline     >] SYSC_write fs/read_write.c:624
 [<ffffffff817b50a1>] SyS_write+0x111/0x220 fs/read_write.c:616
 [<ffffffff86336c36>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185

Also a similar warning is found but in another path:
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82be2c0d>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff81355139>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:482
 [<ffffffff81355369>] warn_slowpath_null+0x29/0x30 kernel/panic.c:515
 [<ffffffff8527e69a>] rawmidi_transmit_ack+0x24a/0x3b0 sound/core/rawmidi.c:1133
 [<ffffffff8527e851>] snd_rawmidi_transmit_ack+0x51/0x80 sound/core/rawmidi.c:1163
 [<ffffffff852d9046>] snd_virmidi_output_trigger+0x2b6/0x570 sound/core/seq/seq_virmidi.c:185
 [<     inline     >] snd_rawmidi_output_trigger sound/core/rawmidi.c:150
 [<ffffffff85285a0b>] snd_rawmidi_kernel_write1+0x4bb/0x760 sound/core/rawmidi.c:1252
 [<ffffffff85287b73>] snd_rawmidi_write+0x543/0xb30 sound/core/rawmidi.c:1302
 [<ffffffff817ba5f3>] __vfs_write+0x113/0x480 fs/read_write.c:528
 [<ffffffff817bc087>] vfs_write+0x167/0x4a0 fs/read_write.c:577
 [<     inline     >] SYSC_write fs/read_write.c:624
 [<ffffffff817bf371>] SyS_write+0x111/0x220 fs/read_write.c:616
 [<ffffffff86660276>] entry_SYSCALL_64_fastpath+0x16/0x7a arch/x86/entry/entry_64.S:185

In the former case, the reason is that virmidi has an open code
calling snd_rawmidi_transmit_ack() with the value calculated outside
the spinlock.   We may use snd_rawmidi_transmit() in a loop just for
consuming the input data, but even there, there is a race between
snd_rawmidi_transmit_peek() and snd_rawmidi_tranmit_ack().

Similarly in the latter case, it calls snd_rawmidi_transmit_peek() and
snd_rawmidi_tranmit_ack() separately without protection, so they are
racy as well.

The patch tries to address these issues by the following ways:
- Introduce the unlocked versions of snd_rawmidi_transmit_peek() and
  snd_rawmidi_transmit_ack() to be called inside the explicit lock.
- Rewrite snd_rawmidi_transmit() to be race-free (the former case).
- Make the split calls (the latter case) protected in the rawmidi spin
  lock.

BugLink: http://lkml.kernel.org/r/CACT4Y+YPq1+cYLkadwjWa5XjzF1_Vki1eHnVn-Lm0hzhSpu5PA@mail.gmail.com
BugLink: http://lkml.kernel.org/r/CACT4Y+acG4iyphdOZx47Nyq_VHGbpJQK-6xNpiqUjaZYqsXOGw@mail.gmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/rawmidi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index fdabbb4ddba9..f730b91e472f 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -167,6 +167,10 @@ int snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
 int snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream, int count);
 int snd_rawmidi_transmit(struct snd_rawmidi_substream *substream,
 			 unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_peek(struct snd_rawmidi_substream *substream,
+			      unsigned char *buffer, int count);
+int __snd_rawmidi_transmit_ack(struct snd_rawmidi_substream *substream,
+			       int count);
 
 /* main midi functions */
 
-- 
cgit v1.2.3


From a3d0a918502cc73af4f60da2cc4c5cac5573f183 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 2 Feb 2016 16:57:08 -0800
Subject: thp: make split_queue per-node

Andrea Arcangeli suggested to make split queue per-node to improve
scalability.  Let's do it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 33bb1b19273e..7b6c2cfee390 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -682,6 +682,12 @@ typedef struct pglist_data {
 	 */
 	unsigned long first_deferred_pfn;
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	spinlock_t split_queue_lock;
+	struct list_head split_queue;
+	unsigned long split_queue_len;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
-- 
cgit v1.2.3


From 65376df582174ffcec9e6471bf5b0dd79ba05e4a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Feb 2016 16:57:29 -0800
Subject: proc: revert /proc/<pid>/maps [stack:TID] annotation

Commit b76437579d13 ("procfs: mark thread stack correctly in
proc/<pid>/maps") added [stack:TID] annotation to /proc/<pid>/maps.

Finding the task of a stack VMA requires walking the entire thread list,
turning this into quadratic behavior: a thousand threads means a
thousand stacks, so the rendering of /proc/<pid>/maps needs to look at a
million combinations.

The cost is not in proportion to the usefulness as described in the
patch.

Drop the [stack:TID] annotation to make /proc/<pid>/maps (and
/proc/<pid>/numa_maps) usable again for higher thread counts.

The [stack] annotation inside /proc/<pid>/task/<tid>/maps is retained, as
identifying the stack VMA there is an O(1) operation.

Siddesh said:
 "The end users needed a way to identify thread stacks programmatically and
  there wasn't a way to do that.  I'm afraid I no longer remember (or have
  access to the resources that would aid my memory since I changed
  employers) the details of their requirement.  However, I did do this on my
  own time because I thought it was an interesting project for me and nobody
  really gave any feedback then as to its utility, so as far as I am
  concerned you could roll back the main thread maps information since the
  information is available in the thread-specific files"

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Cc: Shaohua Li <shli@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1cd22f2df1a..0b50d7848e3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1341,8 +1341,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern struct task_struct *task_of_stack(struct task_struct *task,
-				struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
-- 
cgit v1.2.3


From c792e8240338e41eda4d06a3a71a7bb7af4e6156 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 2 Feb 2016 16:57:38 -0800
Subject: mm: memcontrol: drop superfluous entry in the per-memcg stats array

MEM_CGROUP_STAT_NSTATS is just a delimiter for cgroup1 statistics, not
an actual array entry.  Reuse it for the first cgroup2 stat entry, like
in the event array.

Fixes: b2807f07f4f8 ("mm: memcontrol: add "sock" to cgroup2 memory.stat")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ae48d4aeb5e..792c8981e633 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_SWAP,		/* # of pages, swapped out */
 	MEM_CGROUP_STAT_NSTATS,
 	/* default hierarchy stats */
-	MEMCG_SOCK,
+	MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
 	MEMCG_NR_STAT,
 };
 
-- 
cgit v1.2.3


From 30bdbb78009e67767983085e302bec6d97afc679 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Tue, 2 Feb 2016 16:57:46 -0800
Subject: mm: polish virtual memory accounting

* add VM_STACK as alias for VM_GROWSUP/DOWN depending on architecture
* always account VMAs with flag VM_STACK as stack (as it was before)
* cleanup classifying helpers
* update comments and documentation

Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Tested-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 6 ++++--
 include/linux/mm_types.h | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0b50d7848e3a..516e14944339 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 
 #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSUP
 #else
-#define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK	VM_GROWSDOWN
 #endif
 
+#define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
 /*
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d3ebb9d21a53..624b78b848b8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -424,9 +424,9 @@ struct mm_struct {
 	unsigned long total_vm;		/* Total pages mapped */
 	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
 	unsigned long pinned_vm;	/* Refcount permanently increased */
-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
-	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
+	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+	unsigned long stack_vm;		/* VM_STACK */
 	unsigned long def_flags;
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
-- 
cgit v1.2.3


From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 2 Feb 2016 16:57:52 -0800
Subject: radix-tree: fix race in gang lookup

If the indirect_ptr bit is set on a slot, that indicates we need to redo
the lookup.  Introduce a new function radix_tree_iter_retry() which
forces the loop to retry the lookup by setting 'slot' to NULL and
turning the iterator back to point at the problematic entry.

This is a pretty rare problem to hit at the moment; the lookup has to
race with a grow of the radix tree from a height of 0.  The consequences
of hitting this race are that gang lookup could return a pointer to a
radix_tree_node instead of a pointer to whatever the user had inserted
in the tree.

Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator")
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7c88ad156a29..00b17c526c1f 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -378,6 +378,22 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(struct radix_tree_root *root,
 			     struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:	iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+	iter->next_index = iter->index;
+	return NULL;
+}
+
 /**
  * radix_tree_chunk_size - get current chunk size
  *
-- 
cgit v1.2.3


From febe562c20dfa8f33bee7d419c6b517986a5aa33 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 11 Jan 2016 21:31:09 -0800
Subject: target: Fix LUN_RESET active I/O handling for ACK_KREF

This patch fixes a NULL pointer se_cmd->cmd_kref < 0
refcount bug during TMR LUN_RESET with active se_cmd
I/O, that can be triggered during se_cmd descriptor
shutdown + release via core_tmr_drain_state_list() code.

To address this bug, add common __target_check_io_state()
helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE
checking, and set CMD_T_ABORTED + obtain ->cmd_kref for
both cases ahead of last target_put_sess_cmd() after
TFO->aborted_task() -> transport_cmd_finish_abort()
callback has completed.

It also introduces SCF_ACK_KREF to determine when
transport_cmd_finish_abort() needs to drop the second
extra reference, ahead of calling target_put_sess_cmd()
for the final kref_put(&se_cmd->cmd_kref).

It also updates transport_cmd_check_stop() to avoid
holding se_cmd->t_state_lock while dropping se_cmd
device state via target_remove_from_state_list(), now
that core_tmr_drain_state_list() is holding the
se_device lock while checking se_cmd state from
within TMR logic.

Finally, move transport_put_cmd() release of SGL +
TMR + extended CDB memory into target_free_cmd_mem()
in order to avoid potential resource leaks in TMR
ABORT_TASK + LUN_RESET code-paths.  Also update
target_release_cmd_kref() accordingly.

Reviewed-by: Quinn Tran <quinn.tran@qlogic.com>
Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 5d82816cc4e3..1a76726c45a2 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -140,6 +140,7 @@ enum se_cmd_flags_table {
 	SCF_COMPARE_AND_WRITE		= 0x00080000,
 	SCF_COMPARE_AND_WRITE_POST	= 0x00100000,
 	SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
+	SCF_ACK_KREF			= 0x00400000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
-- 
cgit v1.2.3


From fac710e45d0b12443acd4d905c9acec27ab4ca14 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 27 Jan 2016 10:08:42 -0200
Subject: [media] vb2: fix nasty vb2_thread regression

The vb2_thread implementation was made generic and was moved from
videobuf2-v4l2.c to videobuf2-core.c in commit af3bac1a. Unfortunately
that clearly was never tested since it broke read() causing NULL address
references.

The root cause was confused handling of vb2_buffer vs v4l2_buffer (the pb
pointer in various core functions).

The v4l2_buffer no longer exists after moving the code into the core and
it is no longer needed. However, the vb2_thread code passed a pointer to
a vb2_buffer to the core functions were a v4l2_buffer pointer was expected
and vb2_thread expected that the vb2_buffer fields would be filled in
correctly.

This is obviously wrong since v4l2_buffer != vb2_buffer. Note that the
pb pointer is a void pointer, so no type-checking took place.

This patch fixes this problem:

1) allow pb to be NULL for vb2_core_(d)qbuf. The vb2_thread code will use
   a NULL pointer here since they don't care about v4l2_buffer anyway.
2) let vb2_core_dqbuf pass back the index of the received buffer. This is
   all vb2_thread needs: this index is the index into the q->bufs array
   and vb2_thread just gets the vb2_buffer from there.
3) the fileio->b pointer (that originally contained a v4l2_buffer) is
   removed altogether since it is no longer needed.

Tested with vivid and the cobalt driver.

Cc: stable@vger.kernel.org # Kernel >= 4.3
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Matthias Schwarzott <zzam@gentoo.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/media/videobuf2-core.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index ef03ae56b1c1..8a0f55b6c2ba 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -533,7 +533,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 		const unsigned int requested_sizes[]);
 int vb2_core_prepare_buf(struct vb2_queue *q, unsigned int index, void *pb);
 int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb);
-int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking);
+int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
+		   bool nonblocking);
 
 int vb2_core_streamon(struct vb2_queue *q, unsigned int type);
 int vb2_core_streamoff(struct vb2_queue *q, unsigned int type);
-- 
cgit v1.2.3


From dc6ae6d8e7726bad4f1c87244b49cac851746c65 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sun, 31 Jan 2016 14:36:07 +0100
Subject: crush: add chooseleaf_stable tunable

Add a tunable to fix the bug that chooseleaf may cause unnecessary pg
migrations when some device fails.

Reflects ceph.git commit fdb3f664448e80d984470f32f04e2e6f03ab52ec.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/crush/crush.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 48b49305716b..be8f12b8f195 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -59,7 +59,8 @@ enum {
 	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
 	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
 	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
-	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
+	CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
 };
 
 /*
@@ -205,6 +206,11 @@ struct crush_map {
 	 * mappings line up a bit better with previous mappings. */
 	__u8 chooseleaf_vary_r;
 
+	/* if true, it makes chooseleaf firstn to return stable results (if
+	 * no local retry) so that data migrations would be optimal when some
+	 * device fails. */
+	__u8 chooseleaf_stable;
+
 #ifndef __KERNEL__
 	/*
 	 * version 0 (original) of straw_calc has various flaws.  version 1
-- 
cgit v1.2.3


From 97db9a88186e3a7d3a1942370c836bf221d3ab90 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 1 Feb 2016 17:08:33 +0100
Subject: libceph: advertise support for TUNABLES5

Add TUNABLES5 feature (chooseleaf_stable tunable) to a set of features
supported by default.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index f89b31d45cc8..c3b211c9fe83 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -63,6 +63,16 @@
 #define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
 // duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
 #define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
+#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
+#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
+#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
+#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
+#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
+#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
+#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
+#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
+#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -108,7 +118,8 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
-	 CEPH_FEATURE_CRUSH_V4)
+	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES5)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-- 
cgit v1.2.3


From b0b31a8ffe54abf0a455bcaee54dd92f08817164 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 3 Feb 2016 15:25:48 +0100
Subject: libceph: MOSDOpReply v7 encoding

Empty request_redirect_t (struct ceph_request_redirect in the kernel
client) is now encoded with a bool.  NEW_OSDOPREPLY_ENCODING feature
bit overlaps with already supported CRUSH_TUNABLES5.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index c3b211c9fe83..c1ef6f14e7be 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -73,6 +73,8 @@
 #define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
 #define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
 #define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
+// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
+#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -119,7 +121,8 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
-	 CEPH_FEATURE_CRUSH_TUNABLES5)
+	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-- 
cgit v1.2.3


From 0fb5b1fb30fba3671dd5b1489d78e93e08d62e4e Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Thu, 4 Feb 2016 00:52:12 -0500
Subject: block/sd: Return -EREMOTEIO when WRITE SAME and DISCARD are disabled

When a storage device rejects a WRITE SAME command we will disable write
same functionality for the device and return -EREMOTEIO to the block
layer. -EREMOTEIO will in turn prevent DM from retrying the I/O and/or
failing the path.

Yiwen Jiang discovered a small race where WRITE SAME requests issued
simultaneously would cause -EIO to be returned. This happened because
any requests being prepared after WRITE SAME had been disabled for the
device caused us to return BLKPREP_KILL. The latter caused the block
layer to return -EIO upon completion.

To overcome this we introduce BLKPREP_INVALID which indicates that this
is an invalid request for the device. blk_peek_request() is modified to
return -EREMOTEIO in that case.

Reported-by: Yiwen Jiang <jiangyiwen@huawei.com>
Suggested-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Hannes Reinicke <hare@suse.de>
Reviewed-by: Ewan Milne <emilne@redhat.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/blkdev.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d372ea87ead5..a9b643a0647f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -681,9 +681,12 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 /*
  * q->prep_rq_fn return values
  */
-#define BLKPREP_OK		0	/* serve it */
-#define BLKPREP_KILL		1	/* fatal error, kill */
-#define BLKPREP_DEFER		2	/* leave on queue */
+enum {
+	BLKPREP_OK,		/* serve it */
+	BLKPREP_KILL,		/* fatal error, kill, return -EIO */
+	BLKPREP_DEFER,		/* leave on queue */
+	BLKPREP_INVALID,	/* invalid command, kill, return -EREMOTEIO */
+};
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
-- 
cgit v1.2.3


From 64566b5e767f9bc3161055ca1b443a51afb52aad Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Fri, 22 Jan 2016 17:07:25 -0500
Subject: drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil

drm_fixp_from_fraction allows us to create a fixed point directly
from a fraction, rather than creating fixed point values and dividing
later. This avoids overflow of our 64 bit value for large numbers.

drm_fixp2int_ceil allows us to return the ceiling of our fixed point
value.

[airlied: squash Jordan's fix]
32-bit-build-fix: Jordan Lazare <Jordan.Lazare@amd.com>
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Cc: stable@vger.kernel.org
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_fixed.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index d639049a613d..553210c02ee0 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B)
 #define DRM_FIXED_ONE		(1ULL << DRM_FIXED_POINT)
 #define DRM_FIXED_DECIMAL_MASK	(DRM_FIXED_ONE - 1)
 #define DRM_FIXED_DIGITS_MASK	(~DRM_FIXED_DECIMAL_MASK)
+#define DRM_FIXED_EPSILON	1LL
+#define DRM_FIXED_ALMOST_ONE	(DRM_FIXED_ONE - DRM_FIXED_EPSILON)
 
 static inline s64 drm_int2fixp(int a)
 {
 	return ((s64)a) << DRM_FIXED_POINT;
 }
 
-static inline int drm_fixp2int(int64_t a)
+static inline int drm_fixp2int(s64 a)
 {
 	return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline unsigned drm_fixp_msbset(int64_t a)
+static inline int drm_fixp2int_ceil(s64 a)
+{
+	if (a > 0)
+		return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE);
+	else
+		return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE);
+}
+
+static inline unsigned drm_fixp_msbset(s64 a)
 {
 	unsigned shift, sign = (a >> 63) & 1;
 
@@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b)
 	return result;
 }
 
+static inline s64 drm_fixp_from_fraction(s64 a, s64 b)
+{
+	s64 res;
+	bool a_neg = a < 0;
+	bool b_neg = b < 0;
+	u64 a_abs = a_neg ? -a : a;
+	u64 b_abs = b_neg ? -b : b;
+	u64 rem;
+
+	/* determine integer part */
+	u64 res_abs  = div64_u64_rem(a_abs, b_abs, &rem);
+
+	/* determine fractional part */
+	{
+		u32 i = DRM_FIXED_POINT;
+
+		do {
+			rem <<= 1;
+			res_abs <<= 1;
+			if (rem >= b_abs) {
+				res_abs |= 1;
+				rem -= b_abs;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		u64 summand = (rem << 1) >= b_abs;
+
+		res_abs += summand;
+	}
+
+	res = (s64) res_abs;
+	if (a_neg ^ b_neg)
+		res = -res;
+	return res;
+}
+
 static inline s64 drm_fixp_exp(s64 x)
 {
 	s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000);
-- 
cgit v1.2.3


From 5e93b8208d3c419b515fb75e2601931c027e12ab Mon Sep 17 00:00:00 2001
From: Hersen Wu <hersenxs.wu@amd.com>
Date: Fri, 22 Jan 2016 17:07:28 -0500
Subject: drm/dp/mst: move GUID storage from mgr, port to only mst branch

Previous implementation does not handle case below: boot up one MST branch
to DP connector of ASIC. After boot up, hot plug 2nd MST branch to DP output
of 1st MST, GUID is not created for 2nd MST branch. When downstream port of
2nd MST branch send upstream request, it fails because 2nd MST branch GUID
is not available.

New Implementation: only create GUID for MST branch and save it within Branch.

Signed-off-by: Hersen Wu <hersenxs.wu@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Cc: stable@vger.kernel.org
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_dp_mst_helper.h | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 24ab1787b771..fdb47051d549 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -44,8 +44,6 @@ struct drm_dp_vcpi {
 /**
  * struct drm_dp_mst_port - MST port
  * @kref: reference count for this port.
- * @guid_valid: for DP 1.2 devices if we have validated the GUID.
- * @guid: guid for DP 1.2 device on this port.
  * @port_num: port number
  * @input: if this port is an input port.
  * @mcs: message capability status - DP 1.2 spec.
@@ -70,10 +68,6 @@ struct drm_dp_vcpi {
 struct drm_dp_mst_port {
 	struct kref kref;
 
-	/* if dpcd 1.2 device is on this port - its GUID info */
-	bool guid_valid;
-	u8 guid[16];
-
 	u8 port_num;
 	bool input;
 	bool mcs;
@@ -110,10 +104,12 @@ struct drm_dp_mst_port {
  * @tx_slots: transmission slots for this device.
  * @last_seqno: last sequence number used to talk to this.
  * @link_address_sent: if a link address message has been sent to this device yet.
+ * @guid: guid for DP 1.2 branch device. port under this branch can be
+ * identified by port #.
  *
  * This structure represents an MST branch device, there is one
- * primary branch device at the root, along with any others connected
- * to downstream ports
+ * primary branch device at the root, along with any other branches connected
+ * to downstream port of parent branches.
  */
 struct drm_dp_mst_branch {
 	struct kref kref;
@@ -132,6 +128,9 @@ struct drm_dp_mst_branch {
 	struct drm_dp_sideband_msg_tx *tx_slots[2];
 	int last_seqno;
 	bool link_address_sent;
+
+	/* global unique identifier to identify branch devices */
+	u8 guid[16];
 };
 
 
@@ -406,11 +405,9 @@ struct drm_dp_payload {
  * @conn_base_id: DRM connector ID this mgr is connected to.
  * @down_rep_recv: msg receiver state for down replies.
  * @up_req_recv: msg receiver state for up requests.
- * @lock: protects mst state, primary, guid, dpcd.
+ * @lock: protects mst state, primary, dpcd.
  * @mst_state: if this manager is enabled for an MST capable port.
  * @mst_primary: pointer to the primary branch device.
- * @guid_valid: GUID valid for the primary branch device.
- * @guid: GUID for primary port.
  * @dpcd: cache of DPCD for primary port.
  * @pbn_div: PBN to slots divisor.
  *
@@ -432,13 +429,11 @@ struct drm_dp_mst_topology_mgr {
 	struct drm_dp_sideband_msg_rx up_req_recv;
 
 	/* pointer to info about the initial MST device */
-	struct mutex lock; /* protects mst_state + primary + guid + dpcd */
+	struct mutex lock; /* protects mst_state + primary + dpcd */
 
 	bool mst_state;
 	struct drm_dp_mst_branch *mst_primary;
-	/* primary MST device GUID */
-	bool guid_valid;
-	u8 guid[16];
+
 	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	u8 sink_count;
 	int pbn_div;
-- 
cgit v1.2.3


From 0f4a943168f31d29a1701908931acaba518b131a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 19 Jan 2016 15:23:02 -0800
Subject: target: Fix remote-port TMR ABORT + se_cmd fabric stop

To address the bug where fabric driver level shutdown
of se_cmd occurs at the same time when TMR CMD_T_ABORTED
is happening resulting in a -1 ->cmd_kref, this patch
adds a CMD_T_FABRIC_STOP bit that is used to determine
when TMR + driver I_T nexus shutdown is happening
concurrently.

It changes target_sess_cmd_list_set_waiting() to obtain
se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local
reference in target_wait_for_sess_cmds() and invoke extra
target_put_sess_cmd() during Task Aborted Status (TAS)
when necessary.

Also, it adds a new target_wait_free_cmd() wrapper around
transport_wait_for_tasks() for the special case within
transport_generic_free_cmd() to set CMD_T_FABRIC_STOP,
and is now aware of CMD_T_ABORTED + CMD_T_TAS status
bits to know when an extra transport_put_cmd() during
TAS is required.

Note transport_generic_free_cmd() is expected to block on
cmd->cmd_wait_comp in order to follow what iscsi-target
expects during iscsi_conn context se_cmd shutdown.

Cc: Quinn Tran <quinn.tran@qlogic.com>
Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
---
 include/target/target_core_base.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 1a76726c45a2..1579539e5669 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -493,6 +493,8 @@ struct se_cmd {
 #define CMD_T_DEV_ACTIVE	(1 << 7)
 #define CMD_T_REQUEST_STOP	(1 << 8)
 #define CMD_T_BUSY		(1 << 9)
+#define CMD_T_TAS		(1 << 10)
+#define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;
 	struct kref		cmd_kref;
 	struct completion	t_transport_stop_comp;
-- 
cgit v1.2.3


From 080fe2068e1c7f19f565b30b78baf78edf16a980 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 5 Feb 2016 15:36:41 -0800
Subject: mm, hugetlb: don't require CMA for runtime gigantic pages

Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation
at runtime") has added the runtime gigantic page allocation via
alloc_contig_range(), making this support available only when CONFIG_CMA
is enabled.  Because it doesn't depend on MIGRATE_CMA pageblocks and the
associated infrastructure, it is possible with few simple adjustments to
require only CONFIG_MEMORY_ISOLATION instead of full CONFIG_CMA.

After this patch, alloc_contig_range() and related functions are
available and used for gigantic pages with just CONFIG_MEMORY_ISOLATION
enabled.  Note CONFIG_CMA selects CONFIG_MEMORY_ISOLATION.  This allows
supporting runtime gigantic pages without the CMA-specific checks in
page allocator fastpaths.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 28ad5f6494b0..af1f2b24bbe4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -547,16 +547,16 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_CMA
-
+#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
 /* The below functions must be run on a range from a single zone. */
 extern int alloc_contig_range(unsigned long start, unsigned long end,
 			      unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+#endif
 
+#ifdef CONFIG_CMA
 /* CMA stuff */
 extern void init_cma_reserved_pageblock(struct page *page);
-
 #endif
 
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.2.3


From 12352d3cae2cebe18805a91fab34b534d7444231 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Fri, 5 Feb 2016 15:36:50 -0800
Subject: mm: replace vma_lock_anon_vma with anon_vma_lock_read/write

Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if
anon_vma appeared between lock and unlock.  We have to check anon_vma
first or call anon_vma_prepare() to be sure that it's here.  There are
only few users of these legacy helpers.  Let's get rid of them.

This patch fixes anon_vma lock imbalance in validate_mm().  Write lock
isn't required here, read lock is enough.

And reorders expand_downwards/expand_upwards: security_mmap_addr() and
wrapping-around check don't have to be under anon vma lock.

Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bdf597c4f0be..a07f42bedda3 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -109,20 +109,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma)
 		__put_anon_vma(anon_vma);
 }
 
-static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		down_write(&anon_vma->root->rwsem);
-}
-
-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
-{
-	struct anon_vma *anon_vma = vma->anon_vma;
-	if (anon_vma)
-		up_write(&anon_vma->root->rwsem);
-}
-
 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
 	down_write(&anon_vma->root->rwsem);
-- 
cgit v1.2.3


From 732042821cfa106b3c20b9780e4c60fee9d68900 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Fri, 5 Feb 2016 15:37:01 -0800
Subject: radix-tree: fix oops after radix_tree_iter_retry

Helper radix_tree_iter_retry() resets next_index to the current index.
In following radix_tree_next_slot current chunk size becomes zero.  This
isn't checked and it tries to dereference null pointer in slot.

Tagged iterator is fine because retry happens only at slot 0 where tag
bitmask in iter->tags is filled with single bit.

Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup")
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 00b17c526c1f..f54be7082207 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -400,7 +400,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter)
  * @iter:	pointer to radix tree iterator
  * Returns:	current chunk size
  */
-static __always_inline unsigned
+static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
 	return iter->next_index - iter->index;
@@ -434,9 +434,9 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 			return slot + offset + 1;
 		}
 	} else {
-		unsigned size = radix_tree_chunk_size(iter) - 1;
+		long size = radix_tree_chunk_size(iter);
 
-		while (size--) {
+		while (--size > 0) {
 			slot++;
 			iter->index++;
 			if (likely(*slot))
-- 
cgit v1.2.3


From 57dae19065bde296dfdf08b8e46c102a671ff741 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 29 Jan 2016 00:07:25 -0800
Subject: target: Drop legacy se_cmd->task_stop_comp + REQUEST_STOP usage

With CMD_T_FABRIC_STOP + se_cmd->cmd_wait_set usage in place,
go ahead and drop left-over CMD_T_REQUEST_STOP checks in
target_complete_cmd() and unused target_stop_cmd().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Quinn Tran <quinn.tran@qlogic.com>
Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 1579539e5669..d71a3ead9e63 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -491,7 +491,6 @@ struct se_cmd {
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
 #define CMD_T_DEV_ACTIVE	(1 << 7)
-#define CMD_T_REQUEST_STOP	(1 << 8)
 #define CMD_T_BUSY		(1 << 9)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
@@ -514,9 +513,6 @@ struct se_cmd {
 
 	struct list_head	state_list;
 
-	/* old task stop completion, consider merging with some of the above */
-	struct completion	task_stop_comp;
-
 	/* backend private data */
 	void			*priv;
 
-- 
cgit v1.2.3


From 1f55c718c290616889c04946864a13ef30f64929 Mon Sep 17 00:00:00 2001
From: "Herton R. Krzesinski" <herton@redhat.com>
Date: Thu, 14 Jan 2016 17:56:58 -0200
Subject: pty: make sure super_block is still valid in final /dev/tty close

Considering current pty code and multiple devpts instances, it's possible
to umount a devpts file system while a program still has /dev/tty opened
pointing to a previosuly closed pty pair in that instance. In the case all
ptmx and pts/N files are closed, umount can be done. If the program closes
/dev/tty after umount is done, devpts_kill_index will use now an invalid
super_block, which was already destroyed in the umount operation after
running ->kill_sb. This is another "use after free" type of issue, but now
related to the allocated super_block instance.

To avoid the problem (warning at ida_remove and potential crashes) for
this specific case, I added two functions in devpts which grabs additional
references to the super_block, which pty code now uses so it makes sure
the super block structure is still valid until pty shutdown is done.
I also moved the additional inode references to the same functions, which
also covered similar case with inode being freed before /dev/tty final
close/shutdown.

Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
Cc: stable@vger.kernel.org # 2.6.29+
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/devpts_fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index 251a2090a554..e0ee0b3000b2 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -19,6 +19,8 @@
 
 int devpts_new_index(struct inode *ptmx_inode);
 void devpts_kill_index(struct inode *ptmx_inode, int idx);
+void devpts_add_ref(struct inode *ptmx_inode);
+void devpts_del_ref(struct inode *ptmx_inode);
 /* mknod in devpts */
 struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 		void *priv);
@@ -32,6 +34,8 @@ void devpts_pty_kill(struct inode *inode);
 /* Dummy stubs in the no-pty case */
 static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
 static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
+static inline void devpts_add_ref(struct inode *ptmx_inode) { }
+static inline void devpts_del_ref(struct inode *ptmx_inode) { }
 static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
 		dev_t device, int index, void *priv)
 {
-- 
cgit v1.2.3


From 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 3 Feb 2016 02:11:03 +0100
Subject: unix: correctly track in-flight fds in sending process user_struct

The commit referenced in the Fixes tag incorrectly accounted the number
of in-flight fds over a unix domain socket to the original opener
of the file-descriptor. This allows another process to arbitrary
deplete the original file-openers resource limit for the maximum of
open files. Instead the sending processes and its struct cred should
be credited.

To do so, we add a reference counted struct user_struct pointer to the
scm_fp_list and use it to account for the number of inflight unix fds.

Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets")
Reported-by: David Herrmann <dh.herrmann@gmail.com>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 4 ++--
 include/net/scm.h     | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2a91a0561a47..9b4c418bebd8 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -6,8 +6,8 @@
 #include <linux/mutex.h>
 #include <net/sock.h>
 
-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(struct user_struct *user, struct file *fp);
+void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
diff --git a/include/net/scm.h b/include/net/scm.h
index 262532d111f5..59fa93c01d2a 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -21,6 +21,7 @@ struct scm_creds {
 struct scm_fp_list {
 	short			count;
 	short			max;
+	struct user_struct	*user;
 	struct file		*fp[SCM_MAX_FD];
 };
 
-- 
cgit v1.2.3


From 9cf7490360bf2c46a16b7525f899e4970c5fc144 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 2 Feb 2016 19:31:12 -0800
Subject: tcp: do not drop syn_recv on all icmp reports

Petr Novopashenniy reported that ICMP redirects on SYN_RECV sockets
were leading to RST.

This is of course incorrect.

A specific list of ICMP messages should be able to drop a SYN_RECV.

For instance, a REDIRECT on SYN_RECV shall be ignored, as we do
not hold a dst per SYN_RECV pseudo request.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111751
Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Reported-by: Petr Novopashenniy <pety@rusnet.ru>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f6f8f032c73e..ae6468f5c9f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -447,7 +447,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
-void tcp_req_err(struct sock *sk, u32 seq);
+void tcp_req_err(struct sock *sk, u32 seq, bool abort);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(const struct sock *sk,
 				      struct request_sock *req,
-- 
cgit v1.2.3


From 5f74f82ea34c0da80ea0b49192bb5ea06e063593 Mon Sep 17 00:00:00 2001
From: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
Date: Wed, 3 Feb 2016 09:26:57 +0100
Subject: net:Add sysctl_max_skb_frags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Devices may have limits on the number of fragments in an skb they support.
Current codebase uses a constant as maximum for number of fragments one
skb can hold and use.
When enabling scatter/gather and running traffic with many small messages
the codebase uses the maximum number of fragments and may thereby violate
the max for certain devices.
The patch introduces a global variable as max number of fragments.

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11f935c1a090..4ce9ff7086f4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -299,6 +299,7 @@ struct sk_buff;
 #else
 #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
+extern int sysctl_max_skb_frags;
 
 typedef struct skb_frag_struct skb_frag_t;
 
-- 
cgit v1.2.3


From 7e059158d57b79159eaf1f504825d19866ef2c42 Mon Sep 17 00:00:00 2001
From: David Wragg <david@weave.works>
Date: Wed, 10 Feb 2016 00:05:58 +0000
Subject: vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices

Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could
transmit vxlan packets of any size, constrained only by the ability to
send out the resulting packets.  4.3 introduced netdevs corresponding
to tunnel vports.  These netdevs have an MTU, which limits the size of
a packet that can be successfully encapsulated.  The default MTU
values are low (1500 or less), which is awkwardly small in the context
of physical networks supporting jumbo frames, and leads to a
conspicuous change in behaviour for userspace.

Instead, set the MTU on openvswitch-created netdevs to be the relevant
maximum (i.e. the maximum IP packet size minus any relevant overhead),
effectively restoring the behaviour prior to 4.3.

Signed-off-by: David Wragg <david@weave.works>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 6db96ea0144f..dda9abf6b89c 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 		    u8 *protocol, struct flowi4 *fl4);
+int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
-- 
cgit v1.2.3


From 73500267c930baadadb0d02284909731baf151f7 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 8 Feb 2016 14:48:11 -0500
Subject: lib/ucs2_string: Add ucs2 -> utf8 helper functions

This adds ucs2_utf8size(), which tells us how big our ucs2 string is in
bytes, and ucs2_as_utf8, which translates from ucs2 to utf8..

Signed-off-by: Peter Jones <pjones@redhat.com>
Tested-by: Lee, Chun-Yi <jlee@suse.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 include/linux/ucs2_string.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h
index cbb20afdbc01..bb679b48f408 100644
--- a/include/linux/ucs2_string.h
+++ b/include/linux/ucs2_string.h
@@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s);
 unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength);
 int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len);
 
+unsigned long ucs2_utf8size(const ucs2_char_t *src);
+unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src,
+			   unsigned long maxlength);
+
 #endif /* _LINUX_UCS2_STRING_H_ */
-- 
cgit v1.2.3


From 8282f5d9c17fe15a9e658c06e3f343efae1a2a2f Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 8 Feb 2016 14:48:14 -0500
Subject: efi: Make our variable validation list include the guid

All the variables in this list so far are defined to be in the global
namespace in the UEFI spec, so this just further ensures we're
validating the variables we think we are.

Including the guid for entries will become more important in future
patches when we decide whether or not to allow deletion of variables
based on presence in this list.

Signed-off-by: Peter Jones <pjones@redhat.com>
Tested-by: Lee, Chun-Yi <jlee@suse.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 include/linux/efi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a866bb1..16ca611aabc8 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1199,7 +1199,8 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
 struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 				       struct list_head *head, bool remove);
 
-bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len);
+bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
+		     unsigned long data_size);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
-- 
cgit v1.2.3


From ed8b0de5a33d2a2557dce7f9429dca8cb5bc5879 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 8 Feb 2016 14:48:15 -0500
Subject: efi: Make efivarfs entries immutable by default

"rm -rf" is bricking some peoples' laptops because of variables being
used to store non-reinitializable firmware driver data that's required
to POST the hardware.

These are 100% bugs, and they need to be fixed, but in the mean time it
shouldn't be easy to *accidentally* brick machines.

We have to have delete working, and picking which variables do and don't
work for deletion is quite intractable, so instead make everything
immutable by default (except for a whitelist), and make tools that
aren't quite so broad-spectrum unset the immutable flag.

Signed-off-by: Peter Jones <pjones@redhat.com>
Tested-by: Lee, Chun-Yi <jlee@suse.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 include/linux/efi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 16ca611aabc8..47be3ad7d3e5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1201,6 +1201,8 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 
 bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data,
 		     unsigned long data_size);
+bool efivar_variable_is_removable(efi_guid_t vendor, const char *name,
+				  size_t len);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
-- 
cgit v1.2.3


From 9095adaab8c1d82707e4e9961b6ad79b62f3361b Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@qlogic.com>
Date: Wed, 10 Feb 2016 18:59:13 -0500
Subject: target/transport: add flag to indicate CPU Affinity is observed

Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Fixes: fb3269b ("qla2xxx: Add selective command queuing")
Signed-off-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index d71a3ead9e63..e8c8c08bf575 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -141,6 +141,7 @@ enum se_cmd_flags_table {
 	SCF_COMPARE_AND_WRITE_POST	= 0x00100000,
 	SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000,
 	SCF_ACK_KREF			= 0x00400000,
+	SCF_USE_CPUID			= 0x00800000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -188,6 +189,7 @@ enum target_sc_flags_table {
 	TARGET_SCF_BIDI_OP		= 0x01,
 	TARGET_SCF_ACK_KREF		= 0x02,
 	TARGET_SCF_UNKNOWN_SIZE		= 0x04,
+	TARGET_SCF_USE_CPUID	= 0x08,
 };
 
 /* fabric independent task management function values */
-- 
cgit v1.2.3


From 4a389810bc3cb0e73443104f0827e81e23cb1e12 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 11 Feb 2016 16:13:14 -0800
Subject: kernel/locking/lockdep.c: convert hash tables to hlists

Mike said:

: CONFIG_UBSAN_ALIGNMENT breaks x86-64 kernel with lockdep enabled, i.  e
: kernel with CONFIG_UBSAN_ALIGNMENT fails to load without even any error
: message.
:
: The problem is that ubsan callbacks use spinlocks and might be called
: before lockdep is initialized.  Particularly this line in the
: reserve_ebda_region function causes problem:
:
: lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
:
: If i put lockdep_init() before reserve_ebda_region call in
: x86_64_start_reservations kernel loads well.

Fix this ordering issue permanently: change lockdep so that it uses
hlists for the hash tables.  Unlike a list_head, an hlist_head is in its
initialized state when it is all-zeroes, so lockdep is ready for
operation immediately upon boot - lockdep_init() need not have run.

The patch will also save some memory.

lockdep_init() and lockdep_initialized can be done away with now - a 4.6
patch has been prepared to do this.

Reported-by: Mike Krinkin <krinkin.m.u@gmail.com>
Suggested-by: Mike Krinkin <krinkin.m.u@gmail.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c57e424d914b..4dca42fd32f5 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,7 +66,7 @@ struct lock_class {
 	/*
 	 * class-hash:
 	 */
-	struct list_head		hash_entry;
+	struct hlist_node		hash_entry;
 
 	/*
 	 * global list of all lock-classes:
@@ -199,7 +199,7 @@ struct lock_chain {
 	u8				irq_context;
 	u8				depth;
 	u16				base;
-	struct list_head		entry;
+	struct hlist_node		entry;
 	u64				chain_key;
 };
 
-- 
cgit v1.2.3


From db78c22230d0bcc8b27b81f05b39f104f08232c5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 11 Feb 2016 16:13:17 -0800
Subject: mm: fix pfn_t vs highmem

The pfn_t type uses an unsigned long to store a pfn + flags value.  On a
64-bit platform the upper 12 bits of an unsigned long are never used for
storing the value of a pfn.  However, this is not true on highmem
platforms, all 32-bits of a pfn value are used to address a 44-bit
physical address space.  A pfn_t needs to store a 64-bit value.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=112211
Fixes: 01c8f1c44b83 ("mm, dax, gpu: convert vm_insert_mixed to pfn_t")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Stuart Foster <smf.linux@ntlworld.com>
Reported-by: Julian Margetson <runaway@candw.ms>
Tested-by: Julian Margetson <runaway@candw.ms>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pfn.h   |  2 +-
 include/linux/pfn_t.h | 19 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index 2d8e49711b63..1132953235c0 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -10,7 +10,7 @@
  * backing is indicated by flags in the high bits of the value.
  */
 typedef struct {
-	unsigned long val;
+	u64 val;
 } pfn_t;
 #endif
 
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 37448ab5fb5c..94994810c7c0 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -9,14 +9,13 @@
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
  */
-#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \
-		<< (BITS_PER_LONG - PAGE_SHIFT))
-#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
-#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
-#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
-#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
-
-static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
+#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
+#define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
+#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
+#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
+#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+
+static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags)
 {
 	pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
 
@@ -29,7 +28,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
 	return __pfn_to_pfn_t(pfn, 0);
 }
 
-extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
+extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
 
 static inline bool pfn_t_has_page(pfn_t pfn)
 {
@@ -87,7 +86,7 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot)
 #ifdef __HAVE_ARCH_PTE_DEVMAP
 static inline bool pfn_t_devmap(pfn_t pfn)
 {
-	const unsigned long flags = PFN_DEV|PFN_MAP;
+	const u64 flags = PFN_DEV|PFN_MAP;
 
 	return (pfn.val & flags) == flags;
 }
-- 
cgit v1.2.3