From 87712bf81dd092821c406ea3fb47a07222484a64 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 14 Dec 2014 23:34:51 -0800
Subject: Drivers: hv: vmbus: Use get_cpu() to get the current CPU

Replace calls for smp_processor_id() to get_cpu() to get the CPU ID of
the current CPU. In these instances, there is no correctness issue with
regards to preemption, we just need the current CPU ID.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 4 +++-
 drivers/hv/connection.c   | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 2c59f030546b..1d7df2576b1c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -815,7 +815,7 @@ cleanup:
 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 {
 	struct list_head *cur, *tmp;
-	int cur_cpu = hv_context.vp_index[smp_processor_id()];
+	int cur_cpu;
 	struct vmbus_channel *cur_channel;
 	struct vmbus_channel *outgoing_channel = primary;
 	int cpu_distance, new_cpu_distance;
@@ -823,6 +823,8 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
 	if (list_empty(&primary->sc_list))
 		return outgoing_channel;
 
+	cur_cpu = hv_context.vp_index[get_cpu()];
+	put_cpu();
 	list_for_each_safe(cur, tmp, &primary->sc_list) {
 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
 		if (cur_channel->state != CHANNEL_OPENED_STATE)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index e206619b946e..a63a795300b9 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -80,8 +80,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 	msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
 	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
 	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
-	if (version == VERSION_WIN8_1)
-		msg->target_vcpu = hv_context.vp_index[smp_processor_id()];
+	if (version == VERSION_WIN8_1) {
+		msg->target_vcpu = hv_context.vp_index[get_cpu()];
+		put_cpu();
+	}
 
 	/*
 	 * Add to list before we send the request since we may
-- 
cgit v1.2.3


From 79208c57da5311860f165b613c89b3f647e357cd Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:29 -0800
Subject: Drivers: hv: hv_balloon: Make adjustments in computing the floor

Make adjustments in computing the balloon floor. The current computation
of the balloon floor was not appropriate for virtual machines with more than
10 GB of assigned memory - we would get into situations where the host would
agressively balloon down the guest and leave the guest in an unusable state.
This patch fixes the issue by raising the floor.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index b958ded8ac7e..9cbbb831778a 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -928,9 +928,8 @@ static unsigned long compute_balloon_floor(void)
 	 *     128        72    (1/2)
 	 *     512       168    (1/4)
 	 *    2048       360    (1/8)
-	 *    8192       552    (1/32)
-	 *   32768      1320
-	 *  131072      4392
+	 *    8192       768    (1/16)
+	 *   32768      1536	(1/32)
 	 */
 	if (totalram_pages < MB2PAGES(128))
 		min_pages = MB2PAGES(8) + (totalram_pages >> 1);
@@ -938,8 +937,10 @@ static unsigned long compute_balloon_floor(void)
 		min_pages = MB2PAGES(40) + (totalram_pages >> 2);
 	else if (totalram_pages < MB2PAGES(2048))
 		min_pages = MB2PAGES(104) + (totalram_pages >> 3);
+	else if (totalram_pages < MB2PAGES(8192))
+		min_pages = MB2PAGES(256) + (totalram_pages >> 4);
 	else
-		min_pages = MB2PAGES(296) + (totalram_pages >> 5);
+		min_pages = MB2PAGES(512) + (totalram_pages >> 5);
 #undef MB2PAGES
 	return min_pages;
 }
-- 
cgit v1.2.3


From 22f88475b62ac826acae2f77c3e1bd9543e87b2a Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:30 -0800
Subject: Drivers: hv: hv_balloon: Fix a locking bug in the balloon driver

We support memory hot-add in the Hyper-V balloon driver by hot adding an appropriately
sized and aligned region and controlling the on-lining of pages within that region
based on the pages that the host wants us to online. We do this because the
granularity and alignment requirements in Linux are different from what Windows
expects. The state to manage the onlining of pages needs to be correctly
protected. Fix this bug.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 68 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 5 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 9cbbb831778a..8e30415b0eb7 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -533,6 +533,9 @@ struct hv_dynmem_device {
 	 */
 	struct task_struct *thread;
 
+	struct mutex ha_region_mutex;
+	struct completion waiter_event;
+
 	/*
 	 * A list of hot-add regions.
 	 */
@@ -549,7 +552,59 @@ struct hv_dynmem_device {
 static struct hv_dynmem_device dm_device;
 
 static void post_status(struct hv_dynmem_device *dm);
+
 #ifdef CONFIG_MEMORY_HOTPLUG
+static void acquire_region_mutex(bool trylock)
+{
+	if (trylock) {
+		reinit_completion(&dm_device.waiter_event);
+		while (!mutex_trylock(&dm_device.ha_region_mutex))
+			wait_for_completion(&dm_device.waiter_event);
+	} else {
+		mutex_lock(&dm_device.ha_region_mutex);
+	}
+}
+
+static void release_region_mutex(bool trylock)
+{
+	if (trylock) {
+		mutex_unlock(&dm_device.ha_region_mutex);
+	} else {
+		mutex_unlock(&dm_device.ha_region_mutex);
+		complete(&dm_device.waiter_event);
+	}
+}
+
+static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
+			      void *v)
+{
+	switch (val) {
+	case MEM_GOING_ONLINE:
+		acquire_region_mutex(true);
+		break;
+
+	case MEM_ONLINE:
+	case MEM_CANCEL_ONLINE:
+		release_region_mutex(true);
+		if (dm_device.ha_waiting) {
+			dm_device.ha_waiting = false;
+			complete(&dm_device.ol_waitevent);
+		}
+		break;
+
+	case MEM_GOING_OFFLINE:
+	case MEM_OFFLINE:
+	case MEM_CANCEL_OFFLINE:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hv_memory_nb = {
+	.notifier_call = hv_memory_notifier,
+	.priority = 0
+};
+
 
 static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
 {
@@ -591,6 +646,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		init_completion(&dm_device.ol_waitevent);
 		dm_device.ha_waiting = true;
 
+		release_region_mutex(false);
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -619,6 +675,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		 * have not been "onlined" within the allowed time.
 		 */
 		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
+		acquire_region_mutex(false);
 		post_status(&dm_device);
 	}
 
@@ -632,11 +689,6 @@ static void hv_online_page(struct page *pg)
 	unsigned long cur_start_pgp;
 	unsigned long cur_end_pgp;
 
-	if (dm_device.ha_waiting) {
-		dm_device.ha_waiting = false;
-		complete(&dm_device.ol_waitevent);
-	}
-
 	list_for_each(cur, &dm_device.ha_region_list) {
 		has = list_entry(cur, struct hv_hotadd_state, list);
 		cur_start_pgp = (unsigned long)
@@ -834,6 +886,7 @@ static void hot_add_req(struct work_struct *dummy)
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+	acquire_region_mutex(false);
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -865,6 +918,7 @@ static void hot_add_req(struct work_struct *dummy)
 	if (do_hot_add)
 		resp.page_count = process_hot_add(pg_start, pfn_cnt,
 						rg_start, rg_sz);
+	release_region_mutex(false);
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -1388,7 +1442,9 @@ static int balloon_probe(struct hv_device *dev,
 	dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
+	init_completion(&dm_device.waiter_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
+	mutex_init(&dm_device.ha_region_mutex);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
 	INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
 	dm_device.host_specified_ha_region = false;
@@ -1402,6 +1458,7 @@ static int balloon_probe(struct hv_device *dev,
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 	set_online_page_callback(&hv_online_page);
+	register_memory_notifier(&hv_memory_nb);
 #endif
 
 	hv_set_drvdata(dev, &dm_device);
@@ -1520,6 +1577,7 @@ static int balloon_remove(struct hv_device *dev)
 	kfree(send_buffer);
 #ifdef CONFIG_MEMORY_HOTPLUG
 	restore_online_page_callback(&hv_online_page);
+	unregister_memory_notifier(&hv_memory_nb);
 #endif
 	list_for_each_safe(cur, tmp, &dm->ha_region_list) {
 		has = list_entry(cur, struct hv_hotadd_state, list);
-- 
cgit v1.2.3


From ab3de22bb4a3d4bda2d0ec8bebcb76a40f1cbf9b Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:31 -0800
Subject: Drivers: hv: hv_balloon: Don't post pressure status from interrupt
 context

We currently release memory (balloon down) in the interrupt context and we also
post memory status while releasing memory. Rather than posting the status
in the interrupt context, wakeup the status posting thread to post the status.
This will address the inconsistent lock state that Sitsofe Wheeler <sitsofe@gmail.com>
reported:

http://lkml.iu.edu/hypermail/linux/kernel/1411.1/00075.html

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reported-by: Sitsofe Wheeler <sitsofe@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 8e30415b0eb7..ff169386b2c7 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1226,7 +1226,7 @@ static void balloon_down(struct hv_dynmem_device *dm,
 
 	for (i = 0; i < range_count; i++) {
 		free_balloon_pages(dm, &range_array[i]);
-		post_status(&dm_device);
+		complete(&dm_device.config_event);
 	}
 
 	if (req->more_pages == 1)
@@ -1250,19 +1250,16 @@ static void balloon_onchannelcallback(void *context);
 static int dm_thread_func(void *dm_dev)
 {
 	struct hv_dynmem_device *dm = dm_dev;
-	int t;
 
 	while (!kthread_should_stop()) {
-		t = wait_for_completion_interruptible_timeout(
+		wait_for_completion_interruptible_timeout(
 						&dm_device.config_event, 1*HZ);
 		/*
 		 * The host expects us to post information on the memory
 		 * pressure every second.
 		 */
-
-		if (t == 0)
-			post_status(dm);
-
+		reinit_completion(&dm_device.config_event);
+		post_status(dm);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 4061ed9e2aaac31daef44f06e9b83143c78b24b2 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:32 -0800
Subject: Drivers: hv: vmbus: Implement a clockevent device

Implement a clockevent device based on the timer support available on
Hyper-V.
In this version of the patch I have addressed Jason's review comments.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv.c           | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/hv/hyperv_vmbus.h | 21 +++++++++++++
 drivers/hv/vmbus_drv.c    | 37 ++++++++++++++++++++--
 3 files changed, 134 insertions(+), 2 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 3e4235c7a47f..50e51a51ff8b 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -28,7 +28,9 @@
 #include <linux/hyperv.h>
 #include <linux/version.h>
 #include <linux/interrupt.h>
+#include <linux/clockchips.h>
 #include <asm/hyperv.h>
+#include <asm/mshyperv.h>
 #include "hyperv_vmbus.h"
 
 /* The one and only */
@@ -37,6 +39,10 @@ struct hv_context hv_context = {
 	.hypercall_page		= NULL,
 };
 
+#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
+#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
+#define HV_MIN_DELTA_TICKS 1
+
 /*
  * query_hypervisor_info - Get version info of the windows hypervisor
  */
@@ -144,6 +150,8 @@ int hv_init(void)
 	       sizeof(int) * NR_CPUS);
 	memset(hv_context.event_dpc, 0,
 	       sizeof(void *) * NR_CPUS);
+	memset(hv_context.clk_evt, 0,
+	       sizeof(void *) * NR_CPUS);
 
 	max_leaf = query_hypervisor_info();
 
@@ -258,10 +266,63 @@ u16 hv_signal_event(void *con_id)
 	return status;
 }
 
+static int hv_ce_set_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	cycle_t current_tick;
+
+	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
+
+	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+	current_tick += delta;
+	wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
+	return 0;
+}
+
+static void hv_ce_setmode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
+{
+	union hv_timer_config timer_cfg;
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* unsupported */
+		break;
+
+	case CLOCK_EVT_MODE_ONESHOT:
+		timer_cfg.enable = 1;
+		timer_cfg.auto_enable = 1;
+		timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+		wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
+		break;
+
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
+		wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		break;
+	}
+}
+
+static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
+{
+	dev->name = "Hyper-V clockevent";
+	dev->features = CLOCK_EVT_FEAT_ONESHOT;
+	dev->cpumask = cpumask_of(cpu);
+	dev->rating = 1000;
+	dev->owner = THIS_MODULE;
+
+	dev->set_mode = hv_ce_setmode;
+	dev->set_next_event = hv_ce_set_next_event;
+}
+
 
 int hv_synic_alloc(void)
 {
 	size_t size = sizeof(struct tasklet_struct);
+	size_t ced_size = sizeof(struct clock_event_device);
 	int cpu;
 
 	for_each_online_cpu(cpu) {
@@ -272,6 +333,13 @@ int hv_synic_alloc(void)
 		}
 		tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
 
+		hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
+		if (hv_context.clk_evt[cpu] == NULL) {
+			pr_err("Unable to allocate clock event device\n");
+			goto err;
+		}
+		hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
+
 		hv_context.synic_message_page[cpu] =
 			(void *)get_zeroed_page(GFP_ATOMIC);
 
@@ -305,6 +373,7 @@ err:
 static void hv_synic_free_cpu(int cpu)
 {
 	kfree(hv_context.event_dpc[cpu]);
+	kfree(hv_context.clk_evt[cpu]);
 	if (hv_context.synic_event_page[cpu])
 		free_page((unsigned long)hv_context.synic_event_page[cpu]);
 	if (hv_context.synic_message_page[cpu])
@@ -388,6 +457,15 @@ void hv_synic_init(void *arg)
 	hv_context.vp_index[cpu] = (u32)vp_index;
 
 	INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
+
+	/*
+	 * Register the per-cpu clockevent source.
+	 */
+	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
+		clockevents_config_and_register(hv_context.clk_evt[cpu],
+						HV_TIMER_FREQUENCY,
+						HV_MIN_DELTA_TICKS,
+						HV_MAX_MAX_DELTA_TICKS);
 	return;
 }
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index c386d8dc7223..44b1c9424712 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -178,6 +178,23 @@ struct hv_message_header {
 	};
 };
 
+/*
+ * Timer configuration register.
+ */
+union hv_timer_config {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 periodic:1;
+		u64 lazy:1;
+		u64 auto_enable:1;
+		u64 reserved_z0:12;
+		u64 sintx:4;
+		u64 reserved_z1:44;
+	};
+};
+
+
 /* Define timer message payload structure. */
 struct hv_timer_message_payload {
 	u32 timer_index;
@@ -519,6 +536,10 @@ struct hv_context {
 	 * buffer to post messages to the host.
 	 */
 	void *post_msg_page[NR_CPUS];
+	/*
+	 * Support PV clockevent device.
+	 */
+	struct clock_event_device *clk_evt[NR_CPUS];
 };
 
 extern struct hv_context hv_context;
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4d6b26979fbd..7488111ec057 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -32,6 +32,7 @@
 #include <linux/completion.h>
 #include <linux/hyperv.h>
 #include <linux/kernel_stat.h>
+#include <linux/clockchips.h>
 #include <asm/hyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
@@ -578,6 +579,34 @@ static void vmbus_onmessage_work(struct work_struct *work)
 	kfree(ctx);
 }
 
+void hv_process_timer_expiration(struct hv_message *msg, int cpu)
+{
+	struct clock_event_device *dev = hv_context.clk_evt[cpu];
+
+	if (dev->event_handler)
+		dev->event_handler(dev);
+
+	msg->header.message_type = HVMSG_NONE;
+
+	/*
+	 * Make sure the write to MessageType (ie set to
+	 * HVMSG_NONE) happens before we read the
+	 * MessagePending and EOMing. Otherwise, the EOMing
+	 * will not deliver any more messages since there is
+	 * no empty slot
+	 */
+	mb();
+
+	if (msg->header.message_flags.msg_pending) {
+		/*
+		 * This will cause message queue rescan to
+		 * possibly deliver another msg from the
+		 * hypervisor
+		 */
+		wrmsrl(HV_X64_MSR_EOM, 0);
+	}
+}
+
 static void vmbus_on_msg_dpc(unsigned long data)
 {
 	int cpu = smp_processor_id();
@@ -667,8 +696,12 @@ static void vmbus_isr(void)
 	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 
 	/* Check if there are actual msgs to be processed */
-	if (msg->header.message_type != HVMSG_NONE)
-		tasklet_schedule(&msg_dpc);
+	if (msg->header.message_type != HVMSG_NONE) {
+		if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
+			hv_process_timer_expiration(msg, cpu);
+		else
+			tasklet_schedule(&msg_dpc);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 9f52a1630922bcdab75fc72e59ed58db8e164314 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:33 -0800
Subject: Drivers: hv: vmbus: Fix a bug in vmbus_establish_gpadl()

Correctly compute the local (gpadl) handle.
I would like to thank Michael Brown <mcb30@ipxe.org> for seeing this bug.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reported-by: Michael Brown <mcb30@ipxe.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 433f72a1c006..c76ffbe59f65 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -366,8 +366,8 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	unsigned long flags;
 	int ret = 0;
 
-	next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle);
-	atomic_inc(&vmbus_connection.next_gpadl_handle);
+	next_gpadl_handle =
+		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
 
 	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
 	if (ret)
-- 
cgit v1.2.3


From d61031ee8df6214d58371a1cc36a0591e242fba0 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 9 Jan 2015 23:54:34 -0800
Subject: Drivers: hv: vmbus: Support a vmbus API for efficiently sending page
 arrays

Currently, the API for sending a multi-page buffer over VMBUS is limited to
a maximum pfn array of MAX_MULTIPAGE_BUFFER_COUNT. This limitation is
not imposed by the host and unnecessarily limits the maximum payload
that can be sent. Implement an API that does not have this restriction.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index c76ffbe59f65..18c4f23dacf1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -683,6 +683,50 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 }
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
 
+/*
+ * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
+ * using a GPADL Direct packet type.
+ * The buffer includes the vmbus descriptor.
+ */
+int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
+			      struct vmbus_packet_mpb_array *desc,
+			      u32 desc_size,
+			      void *buffer, u32 bufferlen, u64 requestid)
+{
+	int ret;
+	u32 packetlen;
+	u32 packetlen_aligned;
+	struct kvec bufferlist[3];
+	u64 aligned_data = 0;
+	bool signal = false;
+
+	packetlen = desc_size + bufferlen;
+	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
+
+	/* Setup the descriptor */
+	desc->type = VM_PKT_DATA_USING_GPA_DIRECT;
+	desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+	desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */
+	desc->length8 = (u16)(packetlen_aligned >> 3);
+	desc->transactionid = requestid;
+	desc->rangecount = 1;
+
+	bufferlist[0].iov_base = desc;
+	bufferlist[0].iov_len = desc_size;
+	bufferlist[1].iov_base = buffer;
+	bufferlist[1].iov_len = bufferlen;
+	bufferlist[2].iov_base = &aligned_data;
+	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
+
+	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+
+	if (ret == 0 && signal)
+		vmbus_setevent(channel);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
+
 /*
  * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
  * using a GPADL Direct packet type.
-- 
cgit v1.2.3


From d9b1652947c695d247b5e4603a16213ec55661ed Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 14 Jan 2015 01:55:10 -0800
Subject: hv: hv_fcopy: drop the obsolete message on transfer failure

In the case the user-space daemon crashes, hangs or is killed, we
need to down the semaphore, otherwise, after the daemon starts next
time, the obsolete data in fcopy_transaction.message or
fcopy_transaction.fcopy_msg will be used immediately.

Cc: Jason Wang <jasowang@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'drivers/hv')

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23b2ce294c4c..cd453e4b2a07 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -86,6 +86,18 @@ static void fcopy_work_func(struct work_struct *dummy)
 	 * process the pending transaction.
 	 */
 	fcopy_respond_to_host(HV_E_FAIL);
+
+	/* In the case the user-space daemon crashes, hangs or is killed, we
+	 * need to down the semaphore, otherwise, after the daemon starts next
+	 * time, the obsolete data in fcopy_transaction.message or
+	 * fcopy_transaction.fcopy_msg will be used immediately.
+	 *
+	 * NOTE: fcopy_read() happens to get the semaphore (very rare)? We're
+	 * still OK, because we've reported the failure to the host.
+	 */
+	if (down_trylock(&fcopy_transaction.read_sema))
+		;
+
 }
 
 static int fcopy_handle_handshake(u32 version)
@@ -344,6 +356,14 @@ static int fcopy_open(struct inode *inode, struct file *f)
 	return 0;
 }
 
+/* XXX: there are still some tricky corner cases, e.g.,
+ * 1) In a SMP guest, when fcopy_release() runs between
+ * schedule_delayed_work() and fcopy_send_data(), there is
+ * still a chance an obsolete message will be queued.
+ *
+ * 2) When the fcopy daemon is running, if we unload the driver,
+ * we'll notice a kernel oops when we kill the daemon later.
+ */
 static int fcopy_release(struct inode *inode, struct file *f)
 {
 	/*
@@ -351,6 +371,13 @@ static int fcopy_release(struct inode *inode, struct file *f)
 	 */
 	in_hand_shake = true;
 	opened = false;
+
+	if (cancel_delayed_work_sync(&fcopy_work)) {
+		/* We haven't up()-ed the semaphore(very rare)? */
+		if (down_trylock(&fcopy_transaction.read_sema))
+			;
+		fcopy_respond_to_host(HV_E_FAIL);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9c3a6f7e476fc4961297fc66b1177f9f8c8dd238 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 20 Jan 2015 16:45:04 +0100
Subject: Drivers: hv: check vmbus_device_create() return value in
 vmbus_process_offer()

vmbus_device_create() result is not being checked in vmbus_process_offer() and
it can fail if kzalloc() fails. Add the check and do minor cleanup to avoid
additional duplication of "free_channel(); return;" block.

Reported-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 1d7df2576b1c..704c0e00f8d2 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -341,11 +341,10 @@ static void vmbus_process_offer(struct work_struct *work)
 			if (channel->sc_creation_callback != NULL)
 				channel->sc_creation_callback(newchannel);
 
-			return;
+			goto out;
 		}
 
-		free_channel(newchannel);
-		return;
+		goto err_free_chan;
 	}
 
 	/*
@@ -364,6 +363,8 @@ static void vmbus_process_offer(struct work_struct *work)
 		&newchannel->offermsg.offer.if_type,
 		&newchannel->offermsg.offer.if_instance,
 		newchannel);
+	if (!newchannel->device_obj)
+		goto err_free_chan;
 
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
@@ -379,9 +380,12 @@ static void vmbus_process_offer(struct work_struct *work)
 		list_del(&newchannel->listentry);
 		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 		kfree(newchannel->device_obj);
-
-		free_channel(newchannel);
+		goto err_free_chan;
 	}
+out:
+	return;
+err_free_chan:
+	free_channel(newchannel);
 }
 
 enum {
-- 
cgit v1.2.3


From 67fae053bfc6e84144150e4c6c62670abb215c33 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 20 Jan 2015 16:45:05 +0100
Subject: Drivers: hv: rename sc_lock to the more generic lock

sc_lock spinlock in struct vmbus_channel is being used to not only protect the
sc_list field, e.g. vmbus_open() function uses it to implement test-and-set
access to the state field. Rename it to the more generic 'lock' and add the
description.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      |  6 +++---
 drivers/hv/channel_mgmt.c | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 18c4f23dacf1..2978f5ee8d2a 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -73,14 +73,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	unsigned long flags;
 	int ret, t, err = 0;
 
-	spin_lock_irqsave(&newchannel->sc_lock, flags);
+	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
 		newchannel->state = CHANNEL_OPENING_STATE;
 	} else {
-		spin_unlock_irqrestore(&newchannel->sc_lock, flags);
+		spin_unlock_irqrestore(&newchannel->lock, flags);
 		return -EINVAL;
 	}
-	spin_unlock_irqrestore(&newchannel->sc_lock, flags);
+	spin_unlock_irqrestore(&newchannel->lock, flags);
 
 	newchannel->onchannel_callback = onchannelcallback;
 	newchannel->channel_callback_context = context;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 704c0e00f8d2..1e0b996ed643 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -146,7 +146,7 @@ static struct vmbus_channel *alloc_channel(void)
 		return NULL;
 
 	spin_lock_init(&channel->inbound_lock);
-	spin_lock_init(&channel->sc_lock);
+	spin_lock_init(&channel->lock);
 
 	INIT_LIST_HEAD(&channel->sc_list);
 	INIT_LIST_HEAD(&channel->percpu_list);
@@ -246,9 +246,9 @@ static void vmbus_process_rescind_offer(struct work_struct *work)
 		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 	} else {
 		primary_channel = channel->primary_channel;
-		spin_lock_irqsave(&primary_channel->sc_lock, flags);
+		spin_lock_irqsave(&primary_channel->lock, flags);
 		list_del(&channel->sc_list);
-		spin_unlock_irqrestore(&primary_channel->sc_lock, flags);
+		spin_unlock_irqrestore(&primary_channel->lock, flags);
 	}
 	free_channel(channel);
 }
@@ -323,9 +323,9 @@ static void vmbus_process_offer(struct work_struct *work)
 			 * Process the sub-channel.
 			 */
 			newchannel->primary_channel = channel;
-			spin_lock_irqsave(&channel->sc_lock, flags);
+			spin_lock_irqsave(&channel->lock, flags);
 			list_add_tail(&newchannel->sc_list, &channel->sc_list);
-			spin_unlock_irqrestore(&channel->sc_lock, flags);
+			spin_unlock_irqrestore(&channel->lock, flags);
 
 			if (newchannel->target_cpu != get_cpu()) {
 				put_cpu();
-- 
cgit v1.2.3


From d7f2fbafb4f84306436277664cf28042beaf252a Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 20 Jan 2015 16:45:06 +0100
Subject: Drivers: hv: vmbus: serialize Offer and Rescind offer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4b2f9abea52a ("staging: hv: convert channel_mgmt.c to not call
osd_schedule_callback")' was written under an assumption that we never receive
Rescind offer while we're still processing the initial Offer request. However,
the issue we fixed in 04a258c162a8 could be caused by this assumption not
always being true.

In particular, we need to protect against the following:
1) Receiving a Rescind offer after we do queue_work() for processing an Offer
   request and before we actually enter vmbus_process_offer(). work.func points
   to vmbus_process_offer() at this moment and in vmbus_onoffer_rescind() we do
   another queue_work() without a check so we'll enter vmbus_process_offer()
   twice.
2) Receiving a Rescind offer after we enter vmbus_process_offer() and
   especially after we set >state = CHANNEL_OPEN_STATE. Many things can go
   wrong in that case, e.g. we can call free_channel() while we're still using
   it.

Implement the required protection by changing work->func at the very end of
vmbus_process_offer() and checking work->func in vmbus_onoffer_rescind(). In
case we receive rescind offer during or before vmbus_process_offer() is done
we set rescind flag to true and we check it at the end of vmbus_process_offer()
so such offer will not get lost.

Suggested-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 1e0b996ed643..3736f71bdec5 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -279,9 +279,6 @@ static void vmbus_process_offer(struct work_struct *work)
 	int ret;
 	unsigned long flags;
 
-	/* The next possible work is rescind handling */
-	INIT_WORK(&newchannel->work, vmbus_process_rescind_offer);
-
 	/* Make sure this is a new offer */
 	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 
@@ -341,7 +338,7 @@ static void vmbus_process_offer(struct work_struct *work)
 			if (channel->sc_creation_callback != NULL)
 				channel->sc_creation_callback(newchannel);
 
-			goto out;
+			goto done_init_rescind;
 		}
 
 		goto err_free_chan;
@@ -382,7 +379,14 @@ static void vmbus_process_offer(struct work_struct *work)
 		kfree(newchannel->device_obj);
 		goto err_free_chan;
 	}
-out:
+done_init_rescind:
+	spin_lock_irqsave(&newchannel->lock, flags);
+	/* The next possible work is rescind handling */
+	INIT_WORK(&newchannel->work, vmbus_process_rescind_offer);
+	/* Check if rescind offer was already received */
+	if (newchannel->rescind)
+		queue_work(newchannel->controlwq, &newchannel->work);
+	spin_unlock_irqrestore(&newchannel->lock, flags);
 	return;
 err_free_chan:
 	free_channel(newchannel);
@@ -520,6 +524,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 {
 	struct vmbus_channel_rescind_offer *rescind;
 	struct vmbus_channel *channel;
+	unsigned long flags;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
 	channel = relid2channel(rescind->child_relid);
@@ -528,11 +533,20 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		/* Just return here, no channel found */
 		return;
 
+	spin_lock_irqsave(&channel->lock, flags);
 	channel->rescind = true;
+	/*
+	 * channel->work.func != vmbus_process_rescind_offer means we are still
+	 * processing offer request and the rescind offer processing should be
+	 * postponed. It will be done at the very end of vmbus_process_offer()
+	 * as rescind flag is being checked there.
+	 */
+	if (channel->work.func == vmbus_process_rescind_offer)
+		/* work is initialized for vmbus_process_rescind_offer() from
+		 * vmbus_process_offer() where the channel got created */
+		queue_work(channel->controlwq, &channel->work);
 
-	/* work is initialized for vmbus_process_rescind_offer() from
-	 * vmbus_process_offer() where the channel got created */
-	queue_work(channel->controlwq, &channel->work);
+	spin_unlock_irqrestore(&channel->lock, flags);
 }
 
 /*
-- 
cgit v1.2.3


From d8a60e000c951f845d9a5fb3e67853e0e63a5659 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Mon, 26 Jan 2015 01:17:54 +0800
Subject: Drivers: hv: vmbus: hv_process_timer_expiration() can be static

drivers/hv/vmbus_drv.c:582:6: sparse: symbol 'hv_process_timer_expiration' was not declared. Should it be static?

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/vmbus_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/hv')

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 7488111ec057..35e3f422ad7b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -579,7 +579,7 @@ static void vmbus_onmessage_work(struct work_struct *work)
 	kfree(ctx);
 }
 
-void hv_process_timer_expiration(struct hv_message *msg, int cpu)
+static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
 {
 	struct clock_event_device *dev = hv_context.clk_evt[cpu];
 
-- 
cgit v1.2.3